def _file_on_s3(self, relpath): path = Path(__file__).parent / 'test_data' / relpath try: minio.fput_file(bucket, key, path) yield finally: minio.remove(bucket, key)
def test_upload_xlsx(self): path = Path(mock_xlsx_path) minio.fput_file(minio.UserFilesBucket, 'eb785452-f0f2-4ebe-97ce-e225e346148e.xlsx', path) expected_table = pd.DataFrame({ 'Month': ['Jan', 'Feb'], 'Amount': [10, 20] }) self._test_upload(uuid='eb785452-f0f2-4ebe-97ce-e225e346148e', filename='test.xlsx', ext='xlsx', size=path.stat().st_size, expected_result=ProcessResult(expected_table))
def test_upload_xls(self): path = (Path(__file__).parent.parent / 'test_data' / 'example.xls') minio.fput_file(minio.UserFilesBucket, 'eb785452-f0f2-4ebe-97ce-e225e346148e.xls', path) expected_table = pd.DataFrame({ 'foo': [1, 2], 'bar': [2, 3], }) self._test_upload(uuid='eb785452-f0f2-4ebe-97ce-e225e346148e', filename='test.xls', ext='xls', size=path.stat().st_size, expected_result=ProcessResult(expected_table))
def test_read_file_fastparquet_issue_375(self): path = ( Path(__file__).parent.parent / 'test_data' / 'fastparquet-issue-375-snappy.par' ) minio.fput_file(minio.StoredObjectsBucket, 'fastparquet-issue-375-snappy.par', path) so = StoredObject( size=10, bucket=minio.StoredObjectsBucket, key='fastparquet-issue-375-snappy.par' ) assert_frame_equal(so.get_table(), pd.DataFrame())
def write(bucket: str, key: str, table: pandas.DataFrame) -> int: """ Write a Pandas DataFrame to a minio file, overwriting if needed. Return number of bytes written. We aim to keep the file format "stable": all future versions of parquet.read() should support all files written by today's version of this function. """ with tempfile.NamedTemporaryFile() as tf: fastparquet.write(tf.name, table, compression='SNAPPY', object_encoding='utf8') minio.fput_file(bucket, key, Path(tf.name)) tf.seek(0, io.SEEK_END) return tf.tell()