def test_add_remote_fail(): error = None api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3', region_name='us-east-1') # Bind remote context with just bucket try: api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) except Exception as e: error = e finally: assert (type(error) == RuntimeError) # Bind remote to new context with bucket and key try: api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_KEY_URL) except Exception as e: error = e finally: assert (type(error) == RuntimeError) api.delete_context(TEST_CONTEXT)
def test_add_with_treat_as_bundle(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True) # Try to run the pipeline - should fail try: # Run test pipeline api.apply(TEST_CONTEXT, CPush, incremental_push=True) except Exception as e: pass # Get objects from remote objects = s3_client.list_objects(Bucket=TEST_BUCKET) keys = [o['Key'] for o in objects['Contents']] keys = [key.split('/')[-1] for key in keys] # Make sure files exist in S3 for output_file in ['a.txt', 'b.txt']: assert output_file in keys, 'Pipeline should have pushed file' api.delete_context(TEST_CONTEXT)
def manual_test_run_aws_batch(run_test, build_container_setup_only): """ Incomplete test. The container code itself needs to have its S3 access mocked out. Here we are testing manually """ # Setup moto s3 resources #s3_resource = boto3.resource('s3') #s3_resource.create_bucket(Bucket=TEST_BUCKET) # Add a remote. Pull and Push! manual_s3_url = 's3://' api.remote(TEST_CONTEXT, TEST_CONTEXT, manual_s3_url) retval = api.run(SETUP_DIR, TEST_CONTEXT, PIPELINE_CLS, remote_context=TEST_CONTEXT, remote_s3_url=manual_s3_url, pull=True, push=True) # Blow away everything and pull api.rm(TEST_CONTEXT, bundle_name='.*', rm_all=True) api.pull(TEST_CONTEXT) b = api.get(TEST_CONTEXT, 'A') assert b.data == sum(COMMON_DEFAULT_ARGS)
def test_add_remote(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3', region_name='us-east-1') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context with just bucket api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) with api.Bundle(TEST_CONTEXT) as b: b.name = 'output' b.add_data([1, 3, 5]) b.commit() b.push() # Bind remote to new context with bucket and key api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_KEY_URL) with api.Bundle(TEST_CONTEXT) as b: b.name = 'output' b.add_data([1, 3, 5]) b.commit() b.push() api.delete_context(TEST_CONTEXT)
def test(): """ Returns: """ api.context(TEST_CONTEXT) api.remote(TEST_CONTEXT, TEST_CONTEXT, REMOTE_URL, force=True) with api.Bundle(TEST_CONTEXT, TEST_NAME, owner=getpass.getuser()) as b: for i in range(3): with b.add_file('output_{}'.format(i)).open('w') as of: of.write("some text for the {} file".format(i)) b.commit().push() b.rm() b.pull(localize=False) api.apply(TEST_CONTEXT, '-', 'test_output', 'ConsumeExtDep', incremental_pull=True) api.delete_context(TEST_CONTEXT, remote=True)
def test_pull(run_test): s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) bucket = s3_resource.Bucket(TEST_BUCKET) objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) api.apply(TEST_CONTEXT, RemoteTest) bundle = api.get(TEST_CONTEXT, 'remote_test') assert bundle.data == 'Hello' bundle.commit() bundle.push() objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' in objects, 'Bucket should not be empty' assert len(objects['Contents']) > 0, 'Bucket should not be empty' api.delete_context(context_name=TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) api.pull(TEST_CONTEXT) pulled_bundles = api.search(TEST_CONTEXT) assert len(pulled_bundles) > 0, 'Pulled bundles down' assert pulled_bundles[0].data == 'Hello', 'Bundle contains correct data' bucket.objects.all().delete() bucket.delete()
def test_push(run_test): s3_client = boto3.client('s3') s3_resource = boto3.resource('s3', region_name='us-east-1') s3_resource.create_bucket(Bucket=TEST_BUCKET) bucket = s3_resource.Bucket(TEST_BUCKET) objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) _ = api.Bundle(TEST_CONTEXT, name='remote_test', data='Hello') bundle = api.get(TEST_CONTEXT, 'remote_test') assert bundle.data == 'Hello' bundle.commit() bundle.push() objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' in objects, 'Bucket should not be empty' assert len(objects['Contents']) > 0, 'Bucket should not be empty' bucket.objects.all().delete() bucket.delete()
def test_push(): api.context(context_name=TEST_CONTEXT) s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) bucket = s3_resource.Bucket(TEST_BUCKET) objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty' api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True) api.apply(TEST_CONTEXT, 'RemoteTest') bundle = api.get(TEST_CONTEXT, 'remote_test') assert bundle.data == 'Hello' bundle.commit() bundle.push() objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' in objects, 'Bucket should not be empty' assert len(objects['Contents']) > 0, 'Bucket should not be empty' bucket.objects.all().delete() bucket.delete() api.delete_context(context_name=TEST_CONTEXT)
def test_remote_no_push_non_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) # Apply api.apply(TEST_CONTEXT, NonManagedS3) print(api.cat(TEST_CONTEXT, 'b2')) # Local context should not contain file if a remote exists. b = api.search(TEST_CONTEXT, human_name='b2')[0] assert not os.path.exists( b.data['file'] [0]), 'Non Managed S3 file w/ remote should be copied to remote' assert b.data['file'][0].startswith("s3://")
def test_remote_push_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) # Apply api.apply(TEST_CONTEXT, ManagedS3, incremental_push=True) assert not os.path.exists(api.search(TEST_CONTEXT, human_name='b4')[0].data['file'][0]), \ 'Managed S3 file should not be copied to local' # Get objects from remote objects = s3_client.list_objects(Bucket=TEST_BUCKET) keys = [o['Key'] for o in objects['Contents']] keys = [key.split('/')[-1] for key in keys] # Make sure files exist in S3 for output_file in ['test.parquet']: assert output_file in keys, 'Pipeline should have pushed file'
def _setup(remote=True): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3', region_name='us-east-1') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context if remote: api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) return s3_client
def test_remote_no_push_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) with pytest.raises(Exception) as e: api.apply(TEST_CONTEXT, ManagedS3)
def test_zero_copy_s3_file(run_test): """ Test managed path in local file """ s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) saved_md5 = md5_file(__file__) with api.Bundle(TEST_CONTEXT, name=TEST_BUNDLE) as b: s3_target = b.get_remote_file('test_s3_file.txt') aws_s3.cp_local_to_s3_file(__file__, s3_target.path) b.add_data(s3_target) b.add_tags({'info': 'added an s3 file'}) saved_uuid = b.uuid b = api.get(TEST_CONTEXT, None, uuid=saved_uuid) b.pull(localize=True) md5 = md5_file(b.data) print(md5) print(saved_md5) assert md5 == saved_md5
def test_copy_in_s3_file_with_remote(run_test): """ Test copying in s3 file The file should be copied into the remote context """ s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL) # Copy a local file to moto s3 bucket saved_md5 = md5_file(__file__) aws_s3.put_s3_file(__file__, TEST_BUCKET_URL) s3_file = os.path.join(TEST_BUCKET_URL, os.path.basename(__file__)) with api.Bundle(TEST_CONTEXT, name=TEST_BUNDLE) as b: b.add_data(s3_file) b.add_tags({'info': 'added an s3 file'}) saved_uuid = b.uuid b = api.get(TEST_CONTEXT, None, uuid=saved_uuid) assert b.data.startswith("s3://")
def test(): """ Test the api.run() function. 1.) Create the container via the api 2.) Create a test context 3.) Call run locally 4.) Call run on AWS Batch (need to add MonkeyPatch) """ test_arg = [1000, 2000, 8000] api.context(TEST_CONTEXT) api.remote(TEST_CONTEXT, TEST_CONTEXT, TEST_REMOTE, force=True) print("--0: Create docker container") api.dockerize(SETUP_DIR, PIPELINE_CLS) print("--1: Running container locally and storing results locally...") retval = api.run(TEST_CONTEXT, TEST_CONTEXT, OUTPUT_BUNDLE, PIPELINE_CLS, pipeline_args={'int_array': test_arg}, remote=TEST_REMOTE, no_pull=True, no_push=True) print("--1: 100 chars of RETVAL {}".format(retval[:100])) b = api.get(TEST_CONTEXT, OUTPUT_BUNDLE) assert (b is not None) print("--1: Pipeline tried to store {} and we found {}".format( test_arg, b.cat())) assert (np.array_equal(b.cat(), test_arg)) b.rm() print("--2: Running container locally and pushing results ...") retval = api.run(TEST_CONTEXT, TEST_CONTEXT, OUTPUT_BUNDLE, PIPELINE_CLS, pipeline_args={'int_array': test_arg}, remote=TEST_REMOTE, no_pull=True, no_push=False) print("--2: 100 chars of RETVAL {}".format(retval[:100])) print("--2B: Removing local output bundle...") api.get(TEST_CONTEXT, OUTPUT_BUNDLE).rm() print("--2C: Pulling remote bundle and verifying...") api.pull(TEST_CONTEXT) b = api.get(TEST_CONTEXT, OUTPUT_BUNDLE) print("--2C: Pipeline tried to store {} and we found {}".format( test_arg, b.cat())) assert (np.array_equal(b.cat(), test_arg)) b.rm() print("--3: Running container on AWS pulling and pushing results ...") print("--3B: Push docker container") api.dockerize(SETUP_DIR, PIPELINE_CLS, push=True) print("--3C: Run docker container on AWS Batch") retval = api.run(TEST_CONTEXT, TEST_CONTEXT, OUTPUT_BUNDLE, PIPELINE_CLS, pipeline_args={'int_array': test_arg}, remote=TEST_REMOTE, backend='AWSBatch') print("--3C: RETVAL {}".format(retval)) print("--3D: Pulling remote bundle and verifying...") api.pull(TEST_CONTEXT) b = api.get(TEST_CONTEXT, OUTPUT_BUNDLE) print("--3D: Pipeline tried to store {} and we found {}".format( test_arg, b.cat())) assert (np.array_equal(b.cat(), test_arg)) b.rm() print("--4: Running with no submit ...") print("--4B: Reusing docker container") print("--4C: Submit Job on AWS Batch") retval = api.run(TEST_CONTEXT, TEST_CONTEXT, OUTPUT_BUNDLE, PIPELINE_CLS, pipeline_args={'int_array': test_arg}, remote=TEST_REMOTE, backend='AWSBatch', no_submit=True) print("--4C: RETVAL {}".format(retval)) #api.apply(TEST_CONTEXT, '-', '-', 'Root' # ) #b = api.get(TEST_CONTEXT, 'PreMaker_auf_root') #assert(b is not None) api.delete_context(TEST_CONTEXT)
def test_add_with_treat_as_bundle(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') s3_resource.create_bucket(Bucket=TEST_BUCKET) # Make sure bucket is empty objects = s3_client.list_objects(Bucket=TEST_BUCKET) assert 'Contents' not in objects, 'Bucket should be empty' # Bind remote context api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True) # Run test pipeline api.apply(TEST_CONTEXT, CIP) # Push bundles to remote for bundle_name in ['a', 'b', 'c']: assert api.get(TEST_CONTEXT, bundle_name) is not None, 'Bundle should exist' api.commit(TEST_CONTEXT, bundle_name) api.push(TEST_CONTEXT, bundle_name) # Blow away context and recreate api.delete_context(TEST_CONTEXT) assert TEST_CONTEXT not in api.ls_contexts() api.context(context_name=TEST_CONTEXT) api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True) assert api.search(TEST_CONTEXT) == [], 'Context should be empty' # Pull bundles from remote api.pull(TEST_CONTEXT) # Make sure all bundle meta data comes down but data remains in S3 for bundle_name in ['a', 'b', 'c']: bundle = api.get(TEST_CONTEXT, bundle_name) assert bundle is not None, 'Bundle should exist' data_path = bundle.data['file'][0] assert data_path.startswith('s3://'), 'Data should be in S3' # Rerun pipeline api.apply(TEST_CONTEXT, BIP, params={'n': 100}, incremental_pull=True) # Make sure all bundles exist. Bundles a and b should have local paths for bundle_name in ['a', 'b', 'c']: bundle = api.get(TEST_CONTEXT, bundle_name) assert bundle is not None, 'Bundle should exist' data_path = bundle.data['file'][0] if bundle_name in ['a', 'b']: assert not data_path.startswith('s3://'), 'Data should be local' else: assert data_path.startswith('s3://'), 'Data should be in S3' api.delete_context(TEST_CONTEXT)