def test_deserialize_from_csv_no_header(self):
        # Arrange
        data = b'1.0,2.0,nan\n5.1,10.1,20.1\n50.2,,50.3\n'

        # Act
        reader = BytesIO(data)
        result = deserialize_dataframe(reader, DataTypeIds.GenericCSVNoHeader)

        # Assert
        self.assertIsNotNone(result)
        expected = [
            {
                0: 1.0,
                1: 2.0
            },
            {
                0: 5.1,
                1: 10.1,
                2: 20.1
            },
            {
                0: 50.2,
                2: 50.3
            },
        ]
        assert_frame_equal(pd.DataFrame(expected), result)
    def test_deserialize_from_csv_bom(self):
        # Arrange
        data = b'\xef\xbb\xbfa,b,c\n1.0,2.0,nan\n5.1,10.1,20.1\n50.2,,50.3\n'

        # Act
        reader = BytesIO(data)
        result = deserialize_dataframe(reader, DataTypeIds.GenericCSV)

        # Assert
        self.assertIsNotNone(result)
        expected = [
            {
                'a': 1.0,
                'b': 2.0
            },
            {
                'a': 5.1,
                'b': 10.1,
                'c': 20.1
            },
            {
                'a': 50.2,
                'c': 50.3
            },
        ]
        assert_frame_equal(pd.DataFrame(expected), result)
    def test_deserialize_from_csv_spaces(self):
        # Arrange
        data = b'a, b, c\n1.0, two, nan\n5.1, "ten point one", 20.1\n50.2, , 50.3\n'

        # Act
        reader = BytesIO(data)
        result = deserialize_dataframe(reader, DataTypeIds.GenericCSV)

        # Assert
        self.assertIsNotNone(result)
        expected = [
            {
                'a': 1.0,
                'b': 'two'
            },
            {
                'a': 5.1,
                'b': 'ten point one',
                'c': 20.1
            },
            {
                'a': 50.2,
                'c': 50.3
            },
        ]
        assert_frame_equal(pd.DataFrame(expected), result)
Beispiel #4
0
    def test_deserialize_from_arff(self):
        # Arrange
        data = b"""@RELATION	Unnamed

@ATTRIBUTE	Class	NUMERIC
@ATTRIBUTE	age	NUMERIC
@ATTRIBUTE	menopause	NUMERIC
@ATTRIBUTE	tumor-size	NUMERIC

@DATA
0,5,1,1
0,5,4,4
1,4,8,8

"""

        # Act
        reader = BytesIO(data)
        result = deserialize_dataframe(reader, DataTypeIds.ARFF)
        print(result)

        # Assert
        self.assertIsNotNone(result)
        expected = [
            {'Class': 0., 'age': 5., 'menopause': 1., 'tumor-size':1.},
            {'Class': 0., 'age': 5., 'menopause': 4., 'tumor-size':4.},
            {'Class': 1., 'age': 4., 'menopause': 8., 'tumor-size':8.},
        ]
        assert_frame_equal(pd.DataFrame(expected), result)
    def test_deserialize_from_unsupported_data_type_id(self):
        # Arrange
        data = b'1.0,2.0,nan\n5.1,10.1,20.1\n50.2,,50.3\n'

        # Act
        reader = BytesIO(data)
        with self.assertRaises(UnsupportedDatasetTypeError):
            result = deserialize_dataframe(reader, 'Unsupported')
    def test_deserialize_from_unsupported_data_type_id(self):
        # Arrange
        data = b'1.0,2.0,nan\n5.1,10.1,20.1\n50.2,,50.3\n'

        # Act
        reader = BytesIO(data)
        with self.assertRaises(UnsupportedDatasetTypeError):
            result = deserialize_dataframe(reader, 'Unsupported')
    def test_deserialize_from_csv_no_header(self):
        # Arrange
        data = b"1.0,2.0,nan\n5.1,10.1,20.1\n50.2,,50.3\n"

        # Act
        reader = BytesIO(data)
        result = deserialize_dataframe(reader, DataTypeIds.GenericCSVNoHeader)

        # Assert
        self.assertIsNotNone(result)
        expected = [{0: 1.0, 1: 2.0}, {0: 5.1, 1: 10.1, 2: 20.1}, {0: 50.2, 2: 50.3}]
        assert_frame_equal(pd.DataFrame(expected), result)
    def test_deserialize_from_csv_spaces(self):
        # Arrange
        data = b'a, b, c\n1.0, two, nan\n5.1, "ten point one", 20.1\n50.2, , 50.3\n'

        # Act
        reader = BytesIO(data)
        result = deserialize_dataframe(reader, DataTypeIds.GenericCSV)

        # Assert
        self.assertIsNotNone(result)
        expected = [{"a": 1.0, "b": "two"}, {"a": 5.1, "b": "ten point one", "c": 20.1}, {"a": 50.2, "c": 50.3}]
        assert_frame_equal(pd.DataFrame(expected), result)
    def test_deserialize_from_csv_bom(self):
        # Arrange
        data = b"\xef\xbb\xbfa,b,c\n1.0,2.0,nan\n5.1,10.1,20.1\n50.2,,50.3\n"

        # Act
        reader = BytesIO(data)
        result = deserialize_dataframe(reader, DataTypeIds.GenericCSV)

        # Assert
        self.assertIsNotNone(result)
        expected = [{"a": 1.0, "b": 2.0}, {"a": 5.1, "b": 10.1, "c": 20.1}, {"a": 50.2, "c": 50.3}]
        assert_frame_equal(pd.DataFrame(expected), result)
Beispiel #10
0
    def test_deserialize_from_plain_text_bom(self):
        # Arrange
        data = b'\xef\xbb\xbfJohn enjoyed his vacation in California. His personal favorite on the trip was Los Angeles.\r\nMicrosoft announced upgrades to their line of products for information workers. The announcement was made at a partner conference at Boston.'

        # Act
        reader = BytesIO(data)
        result = deserialize_dataframe(reader, DataTypeIds.PlainText)

        # Assert
        self.assertIsNotNone(result)
        expected = [
            {0: 'John enjoyed his vacation in California. His personal favorite on the trip was Los Angeles.'},
            {0: 'Microsoft announced upgrades to their line of products for information workers. The announcement was made at a partner conference at Boston.'},
        ]
        assert_frame_equal(pd.DataFrame(expected), result)
    def test_deserialize_from_csv(self):
        # Arrange
        data = b'a,b,c\n1.0,2.0,nan\n5.1,10.1,20.1\n50.2,,50.3\n'

        # Act
        reader = BytesIO(data)
        result = deserialize_dataframe(reader, DataTypeIds.GenericCSV)

        # Assert
        self.assertIsNotNone(result)
        expected = [
            {'a': 1.0, 'b': 2.0},
            {'a': 5.1, 'b': 10.1, 'c': 20.1},
            {'a': 50.2, 'c': 50.3},
        ]
        assert_frame_equal(pd.DataFrame(expected), result)