def test_cat(run_test): import tempfile # P3 only with tempfile.TemporaryDirectory() as tmpdir: tmpdir = tempfile.mkdtemp() try: # Create a couple of files to throw in the bundle .csv file for i in range(3): test_csv_path = os.path.join(str(tmpdir), '{}_test.csv'.format(i)) df = pd.DataFrame({'a': random.randint(0,10,10), 'b': random.randint(10)}) df.to_csv(test_csv_path) assert os.path.exists(test_csv_path) # Add the file to the bundle. Data is list[filepath,...] api.add(TEST_CONTEXT, TEST_BUNDLE_NAME, tmpdir) # Retrieve the bundle bundle_data = api.cat(TEST_CONTEXT, TEST_BUNDLE_NAME) # Assert the bundles contain the same data for f in bundle_data: i = os.path.basename(f).split('_')[0] bundle_hash, file_hash = get_hash(f), get_hash(os.path.join(tmpdir, '{}_test.csv'.format(i))) assert bundle_hash == file_hash, 'Hashes do not match' finally: shutil.rmtree(tmpdir)
def test_remote_no_push_non_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) # Apply api.apply(TEST_CONTEXT, NonManagedS3) print(api.cat(TEST_CONTEXT, 'b2')) # Local context should not contain file if a remote exists. b = api.search(TEST_CONTEXT, human_name='b2')[0] assert not os.path.exists( b.data['file'] [0]), 'Non Managed S3 file w/ remote should be copied to remote' assert b.data['file'][0].startswith("s3://")
def test_non_managed_local(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.apply(TEST_CONTEXT, NonManagedLocal) assert len( api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present' print(api.cat(TEST_CONTEXT, 'b1')) assert os.path.exists(api.search(TEST_CONTEXT, human_name='b1')[0].data['file'][0]), \ 'Local file should be present in bundle'
def test_remote_push_non_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) # Apply api.apply(TEST_CONTEXT, NonManagedS3, incremental_push=True) print(api.cat(TEST_CONTEXT, 'b2')) # Local context should not contain file if a remote exists. b = api.search(TEST_CONTEXT, human_name='b2')[0] assert not os.path.exists( b.data['file'] [0]), 'Non Managed S3 file w/ remote should be copied to remote' b.pull(localize=True) assert os.path.exists( b.data['file'] [0]), 'Non Managed S3 file after pull should be copied to local' # Get objects from remote objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER) keys = [o['Key'] for o in objects['Contents']] keys = [key.split('/')[-1] for key in keys] # Make sure files exist in S3 for output_file in ['test.parquet']: assert output_file in keys, 'Pipeline should have pushed file'
def test_no_remote_push_non_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER) assert 'Contents' not in objects, 'Bucket should be empty' api.apply(TEST_CONTEXT, NonManagedS3, incremental_push=True) print(api.cat(TEST_CONTEXT, 'b2')) assert len(api.search(TEST_CONTEXT)) == 1, 'One bundle should be present' assert os.path.exists(api.search(TEST_CONTEXT, human_name='b2')[0].data['file'][0]), \ 'Non Managed S3 file should be copied to local'