def test_head_early_stopping(self): with get_datafame_from_objs([ LocalParquetFile(path='./tests/*'), ], head=2) as df: assert isinstance(df, pd.core.frame.DataFrame) assert len(df) == 3
def test_s3_single_file(self, aws_session, parquet_file_s3_1): bucket, key = parquet_file_s3_1 with get_datafame_from_objs( [S3ParquetFile(aws_session=aws_session, bucket=bucket, key=key)]) as df: assert isinstance(df, pd.core.frame.DataFrame) assert len(df) == 3 * 1
def test_local_wildcard(self): with get_datafame_from_objs([ LocalParquetFile(path='./tests/*'), ]) as df: assert isinstance(df, pd.core.frame.DataFrame) assert len(df) == 3 * 3
def test_local_and_s3_files(self, aws_session, parquet_file_s3_1): bucket, key = parquet_file_s3_1 with get_datafame_from_objs([ LocalParquetFile(path='./tests/test1.parquet'), S3ParquetFile(aws_session=aws_session, bucket=bucket, key=key) ]) as df: assert isinstance(df, pd.core.frame.DataFrame) assert len(df) == 3 * 2
def test_s3_wildcard_file(self, aws_session, parquet_file_s3_1, parquet_file_s3_2): bucket, _ = parquet_file_s3_1 with get_datafame_from_objs([ S3ParquetFile(aws_session=aws_session, bucket=bucket, key='*'), ]) as df: assert isinstance(df, pd.core.frame.DataFrame) assert len(df) == 3 * 2
def test_local_double_file(self): with get_datafame_from_objs([ LocalParquetFile(path='./tests/test1.parquet'), LocalParquetFile(path='./tests/test2.parquet') ]) as df: assert isinstance(df, pd.core.frame.DataFrame) assert len(df) == 3 * 2
def test_local_and_s3_wildcard_files(self, aws_session, parquet_file_s3_1, parquet_file_s3_2): bucket, _ = parquet_file_s3_1 with get_datafame_from_objs([ LocalParquetFile(path='./tests/*'), # hit local 3 files S3ParquetFile(aws_session=aws_session, bucket=bucket, key='*') # hit 2 files on s3 ]) as df: assert isinstance(df, pd.core.frame.DataFrame) assert len(df) == 3 * (3 + 2)
def test_s3_double_file(self, aws_session, parquet_file_s3_1, parquet_file_s3_2): bucket_1, key_1 = parquet_file_s3_1 bucket_2, key_2 = parquet_file_s3_2 with get_datafame_from_objs([ S3ParquetFile(aws_session=aws_session, bucket=bucket_1, key=key_1), S3ParquetFile(aws_session=aws_session, bucket=bucket_2, key=key_2) ]) as df: assert isinstance(df, pd.core.frame.DataFrame) assert len(df) == 3 * 2