コード例 #1
0
    def test_serialize_40mb_dataframe(self):
        # Arrange
        blob_name = settings.storage.medium_size_blob
        original_data = self.blob.get_blob_to_bytes(settings.storage.container,
                                                    blob_name)
        original_dataframe = pd.read_csv(BytesIO(original_data),
                                         header=0,
                                         sep=",",
                                         encoding='utf-8-sig')

        self._write_blob_contents(blob_name, original_data)

        # Act
        start_time = datetime.now()
        writer = BytesIO()
        serialize_dataframe(writer, DataTypeIds.GenericCSV, original_dataframe)
        elapsed_time = datetime.now() - start_time
        result_data = writer.getvalue()

        self._write_serialized_frame(blob_name, result_data)

        # Assert
        result_dataframe = pd.read_csv(BytesIO(result_data),
                                       header=0,
                                       sep=",",
                                       encoding='utf-8-sig')
        assert_frame_equal(original_dataframe, result_dataframe)
        self.assertLess(elapsed_time.total_seconds(), 10)
コード例 #2
0
    def test_serialize_to_csv_no_header(self):
        # Arrange
        data = [{'a': 1.0, 'b': 2.0}, {'a': 5.1, 'b': 10.1, 'c': 20.1}]
        dataframe = pd.DataFrame(data)

        # Act
        writer = BytesIO()
        serialize_dataframe(writer, DataTypeIds.GenericCSVNoHeader, dataframe)
        result = writer.getvalue()

        # Assert
        self.assertGreater(len(result), 0)
        self.assertEqual(result, b'1.0,2.0,\n5.1,10.1,20.1\n')
コード例 #3
0
    def test_serialize_to_plain_text(self):
        # Arrange
        data = ['This is the first', 'This is second line']
        dataframe = pd.DataFrame(data)

        # Act
        writer = BytesIO()
        serialize_dataframe(writer, DataTypeIds.PlainText, dataframe)
        result = writer.getvalue()

        # Assert
        self.assertGreater(len(result), 0)
        self.assertEqual(result, b'This is the first\nThis is second line\n')
コード例 #4
0
    def test_serialize_to_tsv(self):
        # Arrange
        data = [{'a': 1.0, 'b': 2.0}, {'a': 5.1, 'b': 10.1, 'c': 20.1}]
        dataframe = pd.DataFrame(data)

        # Act
        writer = BytesIO()
        serialize_dataframe(writer, DataTypeIds.GenericTSV, dataframe)
        result = writer.getvalue()

        # Assert
        self.assertGreater(len(result), 0)
        self.assertEqual(result, b'a\tb\tc\n1.0\t2.0\t\n5.1\t10.1\t20.1\n')
コード例 #5
0
    def test_serialize_to_plain_text(self):
        # Arrange
        data = ['This is the first', 'This is second line']
        dataframe = pd.DataFrame(data)

        # Act
        writer = BytesIO()
        serialize_dataframe(writer, DataTypeIds.PlainText, dataframe)
        result = writer.getvalue()

        # Assert
        self.assertGreater(len(result), 0)
        self.assertEqual(result, b'This is the first\nThis is second line\n')
コード例 #6
0
    def test_serialize_to_tsv_no_header(self):
        # Arrange
        data = [{'a': 1.0, 'b': 2.0}, {'a': 5.1, 'b': 10.1, 'c': 20.1}]
        dataframe = pd.DataFrame(data)

        # Act
        writer = BytesIO()
        serialize_dataframe(writer, DataTypeIds.GenericTSVNoHeader, dataframe)
        result = writer.getvalue()

        # Assert
        self.assertGreater(len(result), 0)
        self.assertEqual(result, b'1.0\t2.0\t\n5.1\t10.1\t20.1\n')
コード例 #7
0
    def test_serialize_to_csv(self):
        # Arrange
        data = [{'a': 1.0, 'b': 2.0}, {'a': 5.1, 'b': 10.1, 'c': 20.1}]
        dataframe = pd.DataFrame(data)

        # Act
        writer = BytesIO()
        serialize_dataframe(writer, DataTypeIds.GenericCSV, dataframe)
        result = writer.getvalue()

        # Assert
        self.assertGreater(len(result), 0)
        self.assertEqual(result, b'a,b,c\n1.0,2.0,\n5.1,10.1,20.1\n')
コード例 #8
0
    def test_serialize_to_tsv(self):
        # Arrange
        data = [{"a": 1.0, "b": 2.0}, {"a": 5.1, "b": 10.1, "c": 20.1}]
        dataframe = pd.DataFrame(data)

        # Act
        writer = BytesIO()
        serialize_dataframe(writer, DataTypeIds.GenericTSV, dataframe)
        result = writer.getvalue()

        # Assert
        self.assertGreater(len(result), 0)
        self.assertEqual(result, b"a\tb\tc\n1.0\t2.0\t\n5.1\t10.1\t20.1\n")
コード例 #9
0
    def test_serialize_to_csv_no_header(self):
        # Arrange
        data = [{"a": 1.0, "b": 2.0}, {"a": 5.1, "b": 10.1, "c": 20.1}]
        dataframe = pd.DataFrame(data)

        # Act
        writer = BytesIO()
        serialize_dataframe(writer, DataTypeIds.GenericCSVNoHeader, dataframe)
        result = writer.getvalue()

        # Assert
        self.assertGreater(len(result), 0)
        self.assertEqual(result, b"1.0,2.0,\n5.1,10.1,20.1\n")
    def test_serialize_40mb_dataframe(self):
        # Arrange
        blob_name = settings.storage.medium_size_blob
        original_data = self.blob.get_blob_to_bytes(settings.storage.container, blob_name)
        original_dataframe = pd.read_csv(BytesIO(original_data), header=0, sep=",", encoding='utf-8-sig')

        self._write_blob_contents(blob_name, original_data)

        # Act
        start_time = datetime.now()
        writer = BytesIO()
        serialize_dataframe(writer, DataTypeIds.GenericCSV, original_dataframe)
        elapsed_time = datetime.now() - start_time
        result_data = writer.getvalue()

        self._write_serialized_frame(blob_name, result_data)

        # Assert
        result_dataframe = pd.read_csv(BytesIO(result_data), header=0, sep=",", encoding='utf-8-sig')
        assert_frame_equal(original_dataframe, result_dataframe)
        self.assertLess(elapsed_time.total_seconds(), 10)