def _file_on_s3(self, relpath):
     path = Path(__file__).parent / 'test_data' / relpath
     try:
         minio.fput_file(bucket, key, path)
         yield
     finally:
         minio.remove(bucket, key)
Exemple #2
0
 def test_upload_xlsx(self):
     path = Path(mock_xlsx_path)
     minio.fput_file(minio.UserFilesBucket,
                     'eb785452-f0f2-4ebe-97ce-e225e346148e.xlsx', path)
     expected_table = pd.DataFrame({
         'Month': ['Jan', 'Feb'],
         'Amount': [10, 20]
     })
     self._test_upload(uuid='eb785452-f0f2-4ebe-97ce-e225e346148e',
                       filename='test.xlsx',
                       ext='xlsx',
                       size=path.stat().st_size,
                       expected_result=ProcessResult(expected_table))
Exemple #3
0
    def test_upload_xls(self):
        path = (Path(__file__).parent.parent / 'test_data' / 'example.xls')
        minio.fput_file(minio.UserFilesBucket,
                        'eb785452-f0f2-4ebe-97ce-e225e346148e.xls', path)
        expected_table = pd.DataFrame({
            'foo': [1, 2],
            'bar': [2, 3],
        })

        self._test_upload(uuid='eb785452-f0f2-4ebe-97ce-e225e346148e',
                          filename='test.xls',
                          ext='xls',
                          size=path.stat().st_size,
                          expected_result=ProcessResult(expected_table))
Exemple #4
0
    def test_read_file_fastparquet_issue_375(self):
        path = (
            Path(__file__).parent.parent
            / 'test_data' / 'fastparquet-issue-375-snappy.par'
        )
        minio.fput_file(minio.StoredObjectsBucket,
                        'fastparquet-issue-375-snappy.par', path)

        so = StoredObject(
            size=10,
            bucket=minio.StoredObjectsBucket,
            key='fastparquet-issue-375-snappy.par'
        )
        assert_frame_equal(so.get_table(), pd.DataFrame())
Exemple #5
0
def write(bucket: str, key: str, table: pandas.DataFrame) -> int:
    """
    Write a Pandas DataFrame to a minio file, overwriting if needed.

    Return number of bytes written.

    We aim to keep the file format "stable": all future versions of
    parquet.read() should support all files written by today's version of this
    function.
    """
    with tempfile.NamedTemporaryFile() as tf:
        fastparquet.write(tf.name, table, compression='SNAPPY',
                          object_encoding='utf8')
        minio.fput_file(bucket, key, Path(tf.name))
        tf.seek(0, io.SEEK_END)
        return tf.tell()