Ejemplo n.º 1
0
def test_add_remote_fail():
    error = None
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3', region_name='us-east-1')

    # Bind remote context with just bucket
    try:
        api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)
    except Exception as e:
        error = e
    finally:
        assert (type(error) == RuntimeError)

    # Bind remote to new context with bucket and key
    try:
        api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_KEY_URL)
    except Exception as e:
        error = e
    finally:
        assert (type(error) == RuntimeError)

    api.delete_context(TEST_CONTEXT)
Ejemplo n.º 2
0
def test_add_with_treat_as_bundle():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    # Try to run the pipeline - should fail
    try:
        # Run test pipeline
        api.apply(TEST_CONTEXT, CPush, incremental_push=True)
    except Exception as e:
        pass

    # Get objects from remote
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    keys = [o['Key'] for o in objects['Contents']]
    keys = [key.split('/')[-1] for key in keys]

    # Make sure files exist in S3
    for output_file in ['a.txt', 'b.txt']:
        assert output_file in keys, 'Pipeline should have pushed file'

    api.delete_context(TEST_CONTEXT)
Ejemplo n.º 3
0
def manual_test_run_aws_batch(run_test, build_container_setup_only):
    """ Incomplete test.   The container code itself needs to have
    its S3 access mocked out.  Here we are testing manually
    """

    # Setup moto s3 resources
    #s3_resource = boto3.resource('s3')
    #s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Add a remote.   Pull and Push!
    manual_s3_url = 's3://'
    api.remote(TEST_CONTEXT, TEST_CONTEXT, manual_s3_url)

    retval = api.run(SETUP_DIR,
                     TEST_CONTEXT,
                     PIPELINE_CLS,
                     remote_context=TEST_CONTEXT,
                     remote_s3_url=manual_s3_url,
                     pull=True,
                     push=True)

    # Blow away everything and pull
    api.rm(TEST_CONTEXT, bundle_name='.*', rm_all=True)
    api.pull(TEST_CONTEXT)
    b = api.get(TEST_CONTEXT, 'A')
    assert b.data == sum(COMMON_DEFAULT_ARGS)
Ejemplo n.º 4
0
def test_add_remote():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3', region_name='us-east-1')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context with just bucket
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    with api.Bundle(TEST_CONTEXT) as b:
        b.name = 'output'
        b.add_data([1, 3, 5])

    b.commit()
    b.push()

    # Bind remote to new context with bucket and key
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_KEY_URL)

    with api.Bundle(TEST_CONTEXT) as b:
        b.name = 'output'
        b.add_data([1, 3, 5])

    b.commit()
    b.push()

    api.delete_context(TEST_CONTEXT)
Ejemplo n.º 5
0
def test():
    """

    Returns:

    """

    api.context(TEST_CONTEXT)
    api.remote(TEST_CONTEXT, TEST_CONTEXT, REMOTE_URL, force=True)

    with api.Bundle(TEST_CONTEXT, TEST_NAME, owner=getpass.getuser()) as b:
        for i in range(3):
            with b.add_file('output_{}'.format(i)).open('w') as of:
                of.write("some text for the {} file".format(i))

    b.commit().push()

    b.rm()

    b.pull(localize=False)

    api.apply(TEST_CONTEXT,
              '-',
              'test_output',
              'ConsumeExtDep',
              incremental_pull=True)

    api.delete_context(TEST_CONTEXT, remote=True)
Ejemplo n.º 6
0
def test_pull(run_test):
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    bucket = s3_resource.Bucket(TEST_BUCKET)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    api.apply(TEST_CONTEXT, RemoteTest)
    bundle = api.get(TEST_CONTEXT, 'remote_test')

    assert bundle.data == 'Hello'

    bundle.commit()
    bundle.push()

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    api.delete_context(context_name=TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)
    api.pull(TEST_CONTEXT)

    pulled_bundles = api.search(TEST_CONTEXT)
    assert len(pulled_bundles) > 0, 'Pulled bundles down'
    assert pulled_bundles[0].data == 'Hello', 'Bundle contains correct data'

    bucket.objects.all().delete()
    bucket.delete()
Ejemplo n.º 7
0
def test_push(run_test):
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3', region_name='us-east-1')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    bucket = s3_resource.Bucket(TEST_BUCKET)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    _ = api.Bundle(TEST_CONTEXT, name='remote_test', data='Hello')
    bundle = api.get(TEST_CONTEXT, 'remote_test')

    assert bundle.data == 'Hello'

    bundle.commit()
    bundle.push()

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    bucket.objects.all().delete()
    bucket.delete()
Ejemplo n.º 8
0
def test_push():
    api.context(context_name=TEST_CONTEXT)

    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    bucket = s3_resource.Bucket(TEST_BUCKET)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    api.apply(TEST_CONTEXT, 'RemoteTest')
    bundle = api.get(TEST_CONTEXT, 'remote_test')

    assert bundle.data == 'Hello'

    bundle.commit()
    bundle.push()

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    bucket.objects.all().delete()
    bucket.delete()
    api.delete_context(context_name=TEST_CONTEXT)
Ejemplo n.º 9
0
def test_remote_no_push_non_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'
    objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    # Apply
    api.apply(TEST_CONTEXT, NonManagedS3)
    print(api.cat(TEST_CONTEXT, 'b2'))

    # Local context should not contain file if a remote exists.
    b = api.search(TEST_CONTEXT, human_name='b2')[0]
    assert not os.path.exists(
        b.data['file']
        [0]), 'Non Managed S3 file w/ remote should be copied to remote'
    assert b.data['file'][0].startswith("s3://")
Ejemplo n.º 10
0
def test_remote_push_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    # Apply
    api.apply(TEST_CONTEXT, ManagedS3, incremental_push=True)

    assert not os.path.exists(api.search(TEST_CONTEXT, human_name='b4')[0].data['file'][0]), \
        'Managed S3 file should not be copied to local'

    # Get objects from remote
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    keys = [o['Key'] for o in objects['Contents']]
    keys = [key.split('/')[-1] for key in keys]

    # Make sure files exist in S3
    for output_file in ['test.parquet']:
        assert output_file in keys, 'Pipeline should have pushed file'
Ejemplo n.º 11
0
def _setup(remote=True):
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3', region_name='us-east-1')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    if remote:
        api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    return s3_client
Ejemplo n.º 12
0
def test_remote_no_push_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    with pytest.raises(Exception) as e:
        api.apply(TEST_CONTEXT, ManagedS3)
Ejemplo n.º 13
0
def test_zero_copy_s3_file(run_test):
    """ Test managed path in local file """
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    saved_md5 = md5_file(__file__)

    with api.Bundle(TEST_CONTEXT, name=TEST_BUNDLE) as b:
        s3_target = b.get_remote_file('test_s3_file.txt')
        aws_s3.cp_local_to_s3_file(__file__, s3_target.path)
        b.add_data(s3_target)
        b.add_tags({'info': 'added an s3 file'})
    saved_uuid = b.uuid

    b = api.get(TEST_CONTEXT, None, uuid=saved_uuid)
    b.pull(localize=True)
    md5 = md5_file(b.data)
    print(md5)
    print(saved_md5)
    assert md5 == saved_md5
Ejemplo n.º 14
0
def test_copy_in_s3_file_with_remote(run_test):
    """ Test copying in s3 file
    The file should be copied into the remote context
    """

    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    # Copy a local file to moto s3 bucket
    saved_md5 = md5_file(__file__)
    aws_s3.put_s3_file(__file__, TEST_BUCKET_URL)

    s3_file = os.path.join(TEST_BUCKET_URL, os.path.basename(__file__))

    with api.Bundle(TEST_CONTEXT, name=TEST_BUNDLE) as b:
        b.add_data(s3_file)
        b.add_tags({'info': 'added an s3 file'})
    saved_uuid = b.uuid

    b = api.get(TEST_CONTEXT, None, uuid=saved_uuid)
    assert b.data.startswith("s3://")
Ejemplo n.º 15
0
def test():
    """ Test the api.run() function.

    1.) Create the container via the api

    2.) Create a test context

    3.) Call run locally

    4.) Call run on AWS Batch (need to add MonkeyPatch)

    """

    test_arg = [1000, 2000, 8000]

    api.context(TEST_CONTEXT)
    api.remote(TEST_CONTEXT, TEST_CONTEXT, TEST_REMOTE, force=True)

    print("--0: Create docker container")
    api.dockerize(SETUP_DIR, PIPELINE_CLS)

    print("--1: Running container locally and storing results locally...")
    retval = api.run(TEST_CONTEXT,
                     TEST_CONTEXT,
                     OUTPUT_BUNDLE,
                     PIPELINE_CLS,
                     pipeline_args={'int_array': test_arg},
                     remote=TEST_REMOTE,
                     no_pull=True,
                     no_push=True)
    print("--1: 100 chars of RETVAL {}".format(retval[:100]))
    b = api.get(TEST_CONTEXT, OUTPUT_BUNDLE)
    assert (b is not None)
    print("--1: Pipeline tried to store {} and we found {}".format(
        test_arg, b.cat()))
    assert (np.array_equal(b.cat(), test_arg))
    b.rm()

    print("--2: Running container locally and pushing results ...")
    retval = api.run(TEST_CONTEXT,
                     TEST_CONTEXT,
                     OUTPUT_BUNDLE,
                     PIPELINE_CLS,
                     pipeline_args={'int_array': test_arg},
                     remote=TEST_REMOTE,
                     no_pull=True,
                     no_push=False)
    print("--2: 100 chars of RETVAL {}".format(retval[:100]))
    print("--2B: Removing local output bundle...")
    api.get(TEST_CONTEXT, OUTPUT_BUNDLE).rm()
    print("--2C: Pulling remote bundle and verifying...")
    api.pull(TEST_CONTEXT)
    b = api.get(TEST_CONTEXT, OUTPUT_BUNDLE)
    print("--2C: Pipeline tried to store {} and we found {}".format(
        test_arg, b.cat()))
    assert (np.array_equal(b.cat(), test_arg))
    b.rm()

    print("--3: Running container on AWS pulling and pushing results ...")
    print("--3B: Push docker container")
    api.dockerize(SETUP_DIR, PIPELINE_CLS, push=True)
    print("--3C: Run docker container on AWS Batch")
    retval = api.run(TEST_CONTEXT,
                     TEST_CONTEXT,
                     OUTPUT_BUNDLE,
                     PIPELINE_CLS,
                     pipeline_args={'int_array': test_arg},
                     remote=TEST_REMOTE,
                     backend='AWSBatch')
    print("--3C: RETVAL {}".format(retval))
    print("--3D: Pulling remote bundle and verifying...")
    api.pull(TEST_CONTEXT)
    b = api.get(TEST_CONTEXT, OUTPUT_BUNDLE)
    print("--3D: Pipeline tried to store {} and we found {}".format(
        test_arg, b.cat()))
    assert (np.array_equal(b.cat(), test_arg))
    b.rm()

    print("--4: Running with no submit ...")
    print("--4B: Reusing docker container")
    print("--4C: Submit Job on AWS Batch")
    retval = api.run(TEST_CONTEXT,
                     TEST_CONTEXT,
                     OUTPUT_BUNDLE,
                     PIPELINE_CLS,
                     pipeline_args={'int_array': test_arg},
                     remote=TEST_REMOTE,
                     backend='AWSBatch',
                     no_submit=True)
    print("--4C: RETVAL {}".format(retval))

    #api.apply(TEST_CONTEXT, '-', '-', 'Root'
    # )

    #b = api.get(TEST_CONTEXT, 'PreMaker_auf_root')

    #assert(b is not None)

    api.delete_context(TEST_CONTEXT)
Ejemplo n.º 16
0
def test_add_with_treat_as_bundle():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    # Run test pipeline
    api.apply(TEST_CONTEXT, CIP)

    # Push bundles to remote
    for bundle_name in ['a', 'b', 'c']:
        assert api.get(TEST_CONTEXT,
                       bundle_name) is not None, 'Bundle should exist'

        api.commit(TEST_CONTEXT, bundle_name)
        api.push(TEST_CONTEXT, bundle_name)

    # Blow away context and recreate
    api.delete_context(TEST_CONTEXT)
    assert TEST_CONTEXT not in api.ls_contexts()

    api.context(context_name=TEST_CONTEXT)
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    assert api.search(TEST_CONTEXT) == [], 'Context should be empty'

    # Pull bundles from remote
    api.pull(TEST_CONTEXT)

    # Make sure all bundle meta data comes down but data remains in S3
    for bundle_name in ['a', 'b', 'c']:
        bundle = api.get(TEST_CONTEXT, bundle_name)
        assert bundle is not None, 'Bundle should exist'

        data_path = bundle.data['file'][0]
        assert data_path.startswith('s3://'), 'Data should be in S3'

    # Rerun pipeline
    api.apply(TEST_CONTEXT, BIP, params={'n': 100}, incremental_pull=True)

    # Make sure all bundles exist. Bundles a and b should have local paths
    for bundle_name in ['a', 'b', 'c']:
        bundle = api.get(TEST_CONTEXT, bundle_name)
        assert bundle is not None, 'Bundle should exist'

        data_path = bundle.data['file'][0]
        if bundle_name in ['a', 'b']:
            assert not data_path.startswith('s3://'), 'Data should be local'
        else:
            assert data_path.startswith('s3://'), 'Data should be in S3'

    api.delete_context(TEST_CONTEXT)