예제 #1
0
 def test_head_early_stopping(self):
     with get_datafame_from_objs([
             LocalParquetFile(path='./tests/*'),
     ],
                                 head=2) as df:
         assert isinstance(df, pd.core.frame.DataFrame)
         assert len(df) == 3
예제 #2
0
 def test_s3_single_file(self, aws_session, parquet_file_s3_1):
     bucket, key = parquet_file_s3_1
     with get_datafame_from_objs(
         [S3ParquetFile(aws_session=aws_session, bucket=bucket,
                        key=key)]) as df:
         assert isinstance(df, pd.core.frame.DataFrame)
         assert len(df) == 3 * 1
예제 #3
0
    def test_local_wildcard(self):
        with get_datafame_from_objs([
                LocalParquetFile(path='./tests/*'),
        ]) as df:

            assert isinstance(df, pd.core.frame.DataFrame)
            assert len(df) == 3 * 3
예제 #4
0
 def test_local_and_s3_files(self, aws_session, parquet_file_s3_1):
     bucket, key = parquet_file_s3_1
     with get_datafame_from_objs([
             LocalParquetFile(path='./tests/test1.parquet'),
             S3ParquetFile(aws_session=aws_session, bucket=bucket, key=key)
     ]) as df:
         assert isinstance(df, pd.core.frame.DataFrame)
         assert len(df) == 3 * 2
예제 #5
0
 def test_s3_wildcard_file(self, aws_session, parquet_file_s3_1,
                           parquet_file_s3_2):
     bucket, _ = parquet_file_s3_1
     with get_datafame_from_objs([
             S3ParquetFile(aws_session=aws_session, bucket=bucket, key='*'),
     ]) as df:
         assert isinstance(df, pd.core.frame.DataFrame)
         assert len(df) == 3 * 2
예제 #6
0
    def test_local_double_file(self):
        with get_datafame_from_objs([
                LocalParquetFile(path='./tests/test1.parquet'),
                LocalParquetFile(path='./tests/test2.parquet')
        ]) as df:

            assert isinstance(df, pd.core.frame.DataFrame)
            assert len(df) == 3 * 2
예제 #7
0
 def test_local_and_s3_wildcard_files(self, aws_session, parquet_file_s3_1,
                                      parquet_file_s3_2):
     bucket, _ = parquet_file_s3_1
     with get_datafame_from_objs([
             LocalParquetFile(path='./tests/*'),  # hit local 3 files
             S3ParquetFile(aws_session=aws_session, bucket=bucket,
                           key='*')  # hit 2 files on s3
     ]) as df:
         assert isinstance(df, pd.core.frame.DataFrame)
         assert len(df) == 3 * (3 + 2)
예제 #8
0
 def test_s3_double_file(self, aws_session, parquet_file_s3_1,
                         parquet_file_s3_2):
     bucket_1, key_1 = parquet_file_s3_1
     bucket_2, key_2 = parquet_file_s3_2
     with get_datafame_from_objs([
             S3ParquetFile(aws_session=aws_session,
                           bucket=bucket_1,
                           key=key_1),
             S3ParquetFile(aws_session=aws_session,
                           bucket=bucket_2,
                           key=key_2)
     ]) as df:
         assert isinstance(df, pd.core.frame.DataFrame)
         assert len(df) == 3 * 2