def test_serialize_40mb_dataframe(self): # Arrange blob_name = settings.storage.medium_size_blob original_data = self.blob.get_blob_to_bytes(settings.storage.container, blob_name) original_dataframe = pd.read_csv(BytesIO(original_data), header=0, sep=",", encoding='utf-8-sig') self._write_blob_contents(blob_name, original_data) # Act start_time = datetime.now() writer = BytesIO() serialize_dataframe(writer, DataTypeIds.GenericCSV, original_dataframe) elapsed_time = datetime.now() - start_time result_data = writer.getvalue() self._write_serialized_frame(blob_name, result_data) # Assert result_dataframe = pd.read_csv(BytesIO(result_data), header=0, sep=",", encoding='utf-8-sig') assert_frame_equal(original_dataframe, result_dataframe) self.assertLess(elapsed_time.total_seconds(), 10)
def test_serialize_to_csv_no_header(self): # Arrange data = [{'a': 1.0, 'b': 2.0}, {'a': 5.1, 'b': 10.1, 'c': 20.1}] dataframe = pd.DataFrame(data) # Act writer = BytesIO() serialize_dataframe(writer, DataTypeIds.GenericCSVNoHeader, dataframe) result = writer.getvalue() # Assert self.assertGreater(len(result), 0) self.assertEqual(result, b'1.0,2.0,\n5.1,10.1,20.1\n')
def test_serialize_to_plain_text(self): # Arrange data = ['This is the first', 'This is second line'] dataframe = pd.DataFrame(data) # Act writer = BytesIO() serialize_dataframe(writer, DataTypeIds.PlainText, dataframe) result = writer.getvalue() # Assert self.assertGreater(len(result), 0) self.assertEqual(result, b'This is the first\nThis is second line\n')
def test_serialize_to_tsv(self): # Arrange data = [{'a': 1.0, 'b': 2.0}, {'a': 5.1, 'b': 10.1, 'c': 20.1}] dataframe = pd.DataFrame(data) # Act writer = BytesIO() serialize_dataframe(writer, DataTypeIds.GenericTSV, dataframe) result = writer.getvalue() # Assert self.assertGreater(len(result), 0) self.assertEqual(result, b'a\tb\tc\n1.0\t2.0\t\n5.1\t10.1\t20.1\n')
def test_serialize_to_tsv_no_header(self): # Arrange data = [{'a': 1.0, 'b': 2.0}, {'a': 5.1, 'b': 10.1, 'c': 20.1}] dataframe = pd.DataFrame(data) # Act writer = BytesIO() serialize_dataframe(writer, DataTypeIds.GenericTSVNoHeader, dataframe) result = writer.getvalue() # Assert self.assertGreater(len(result), 0) self.assertEqual(result, b'1.0\t2.0\t\n5.1\t10.1\t20.1\n')
def test_serialize_to_csv(self): # Arrange data = [{'a': 1.0, 'b': 2.0}, {'a': 5.1, 'b': 10.1, 'c': 20.1}] dataframe = pd.DataFrame(data) # Act writer = BytesIO() serialize_dataframe(writer, DataTypeIds.GenericCSV, dataframe) result = writer.getvalue() # Assert self.assertGreater(len(result), 0) self.assertEqual(result, b'a,b,c\n1.0,2.0,\n5.1,10.1,20.1\n')
def test_serialize_to_tsv(self): # Arrange data = [{"a": 1.0, "b": 2.0}, {"a": 5.1, "b": 10.1, "c": 20.1}] dataframe = pd.DataFrame(data) # Act writer = BytesIO() serialize_dataframe(writer, DataTypeIds.GenericTSV, dataframe) result = writer.getvalue() # Assert self.assertGreater(len(result), 0) self.assertEqual(result, b"a\tb\tc\n1.0\t2.0\t\n5.1\t10.1\t20.1\n")
def test_serialize_to_csv_no_header(self): # Arrange data = [{"a": 1.0, "b": 2.0}, {"a": 5.1, "b": 10.1, "c": 20.1}] dataframe = pd.DataFrame(data) # Act writer = BytesIO() serialize_dataframe(writer, DataTypeIds.GenericCSVNoHeader, dataframe) result = writer.getvalue() # Assert self.assertGreater(len(result), 0) self.assertEqual(result, b"1.0,2.0,\n5.1,10.1,20.1\n")