Ejemplo n.º 1
0
def test_pull(run_test):
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    bucket = s3_resource.Bucket(TEST_BUCKET)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    api.apply(TEST_CONTEXT, RemoteTest)
    bundle = api.get(TEST_CONTEXT, 'remote_test')

    assert bundle.data == 'Hello'

    bundle.commit()
    bundle.push()

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    api.delete_context(context_name=TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)
    api.pull(TEST_CONTEXT)

    pulled_bundles = api.search(TEST_CONTEXT)
    assert len(pulled_bundles) > 0, 'Pulled bundles down'
    assert pulled_bundles[0].data == 'Hello', 'Bundle contains correct data'

    bucket.objects.all().delete()
    bucket.delete()
Ejemplo n.º 2
0
def test_string_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    _ = api.Bundle(TEST_CONTEXT, name='string_task', data='output')
    data = api.get(TEST_CONTEXT, 'string_task').data

    assert data == 'output', 'Data did not match output'
    assert type(data) == six.text_type, 'Data is not string'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
Ejemplo n.º 3
0
def test_float_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    _ = api.Bundle(TEST_CONTEXT, name='float_task', data=2.5)
    data = api.get(TEST_CONTEXT, 'float_task').data

    assert data == 2.5, 'Data did not match output'
    assert type(data) == float, 'Data is not float'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
Ejemplo n.º 4
0
def test_list_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    _ = api.Bundle(TEST_CONTEXT, name='list_task', data=[1, 2, 3])
    data = api.get(TEST_CONTEXT, 'list_task').data

    assert np.array_equal(data, [1, 2, 3]), 'Data did not match output'
    assert type(data) == np.ndarray, 'Data is not list'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
Ejemplo n.º 5
0
def test_dependant_tasks():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, 'C')
    data = api.get(TEST_CONTEXT, 'c').data

    assert data == 6, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(
        api.search(TEST_CONTEXT)) == 3, 'Three bundles should be present'
Ejemplo n.º 6
0
def test_int_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, IntTask)
    data = api.get(TEST_CONTEXT, 'int_task').data

    assert data == 1, 'Data did not match output'
    assert type(data) == int, 'Data is not int'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
Ejemplo n.º 7
0
def test_dict_task():
    setup()
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, DictTask)
    data = api.get(TEST_CONTEXT, 'dict_task').data

    assert data == {'hello': ['world']}, 'Data did not match output'
    assert type(data) == dict, 'Data is not dict'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
Ejemplo n.º 8
0
def test_dict_task():
    setup()
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    d = {'hello': ['world']}
    _ = api.Bundle(TEST_CONTEXT, name='dict_task', data=d)
    d = api.get(TEST_CONTEXT, 'dict_task').data

    assert d == {'hello': ['world']}, 'Data did not match output'
    assert type(d) == dict, 'Data is not dict'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
Ejemplo n.º 9
0
def test_non_managed_local():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, NonManagedLocal)
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
    print(api.cat(TEST_CONTEXT, 'b1'))

    assert os.path.exists(api.search(TEST_CONTEXT, human_name='b1')[0].data['file'][0]), \
        'Local file should be present in bundle'
Ejemplo n.º 10
0
def test_df_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    df = pd.DataFrame()
    df['a'] = [1, 2, 3]

    _ = api.Bundle(TEST_CONTEXT, name='df_task', data=df)
    data = api.get(TEST_CONTEXT, 'df_task').data

    assert df.equals(data), 'Data did not match output'
    assert type(data) == pd.DataFrame, 'Data is not df'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
Ejemplo n.º 11
0
def test_file_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, FileTask)
    output_path = api.get(TEST_CONTEXT, 'file_task').data

    with open(output_path) as f:
        output = f.read()

    assert output == '5', 'Data did not match output'
    assert type(output_path) == str, 'Data is not path'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
Ejemplo n.º 12
0
def test_single_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, 'A')
    data = api.get(TEST_CONTEXT, 'a').data

    assert data == 2, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'

    api.apply(TEST_CONTEXT, 'A')
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
Ejemplo n.º 13
0
def test_push(run_test):
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3', region_name='us-east-1')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    bucket = s3_resource.Bucket(TEST_BUCKET)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    _ = api.Bundle(TEST_CONTEXT, name='remote_test', data='Hello')
    bundle = api.get(TEST_CONTEXT, 'remote_test')

    assert bundle.data == 'Hello'

    bundle.commit()
    bundle.push()

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    bucket.objects.all().delete()
    bucket.delete()
Ejemplo n.º 14
0
def test_remote_push_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    # Apply
    api.apply(TEST_CONTEXT, ManagedS3, incremental_push=True)

    assert not os.path.exists(api.search(TEST_CONTEXT, human_name='b4')[0].data['file'][0]), \
        'Managed S3 file should not be copied to local'

    # Get objects from remote
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    keys = [o['Key'] for o in objects['Contents']]
    keys = [key.split('/')[-1] for key in keys]

    # Make sure files exist in S3
    for output_file in ['test.parquet']:
        assert output_file in keys, 'Pipeline should have pushed file'
Ejemplo n.º 15
0
def test_bundle_push_delocalize():
    """ Test Bundle.push(delocalize)
    Test if we can push individually, and see that the files actualize to s3 paths.
    """
    s3_client = _setup()

    bundles = {}
    for i in range(3):
        name = "shark{}".format(i)
        bundles[name] = create_local_file_bundle(name)
        bundles[name].push(delocalize=True)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    # Check for delocalization
    for b in bundles.values():
        for i, f in enumerate(b.data):
            assert f.startswith("s3://")

    # Might as well test the push and pull worked.
    api.rm(TEST_CONTEXT, rm_all=True)
    api.pull(TEST_CONTEXT, localize=True)
    found_bundles = {b.name: b for b in api.search(TEST_CONTEXT)}
    for n, b in found_bundles.items():
        for i, f in enumerate(b.data):
            assert md5_file(f) == b.tags["f{}".format(i)]

    api.delete_context(TEST_CONTEXT)
Ejemplo n.º 16
0
def test_fast_push():

    s3_client = _setup()

    bundles = {}
    for i in range(3):
        name = "shark{}".format(i)
        bundles[name] = create_local_file_bundle(name)
        bundles[name].push(
        )  # need this to put the bundles in the remote b/c of moto mp issues

    # Moto keeps s3 xfers in memory, so multiprocessing will succeed
    # But when the subprocess exits, the files will disappear
    # This is basically useless in a test.
    api.push(TEST_CONTEXT)  # push and remote all data, then pull and localize.

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    api.rm(TEST_CONTEXT, rm_all=True)
    api.pull(TEST_CONTEXT, localize=True)
    found_bundles = {b.name: b for b in api.search(TEST_CONTEXT)}
    for n, b in found_bundles.items():
        for i, f in enumerate(b.data):
            assert md5_file(f) == b.tags["f{}".format(i)]

    api.delete_context(TEST_CONTEXT)
Ejemplo n.º 17
0
def test_push():
    api.context(context_name=TEST_CONTEXT)

    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    bucket = s3_resource.Bucket(TEST_BUCKET)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    api.apply(TEST_CONTEXT, 'RemoteTest')
    bundle = api.get(TEST_CONTEXT, 'remote_test')

    assert bundle.data == 'Hello'

    bundle.commit()
    bundle.push()

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    bucket.objects.all().delete()
    bucket.delete()
    api.delete_context(context_name=TEST_CONTEXT)
Ejemplo n.º 18
0
def test_task_with_parameter():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, 'B', params={'n': 10})
    data = api.get(TEST_CONTEXT, 'b').data

    assert data == 20, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(api.search(TEST_CONTEXT)) == 1, 'One bundle should be present'

    api.apply(TEST_CONTEXT, 'B', params={'n': 20})
    data = api.get(TEST_CONTEXT, 'b').data

    assert data == 40, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(api.search(TEST_CONTEXT)) == 2, 'Two bundles should be present'
Ejemplo n.º 19
0
def test_remote_no_push_non_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'
    objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    # Apply
    api.apply(TEST_CONTEXT, NonManagedS3)
    print(api.cat(TEST_CONTEXT, 'b2'))

    # Local context should not contain file if a remote exists.
    b = api.search(TEST_CONTEXT, human_name='b2')[0]
    assert not os.path.exists(
        b.data['file']
        [0]), 'Non Managed S3 file w/ remote should be copied to remote'
    assert b.data['file'][0].startswith("s3://")
Ejemplo n.º 20
0
def test_independent_context():
    context_1_name = '__test_context_1__'
    context_2_name = '__test_context_2__'

    api.context(context_1_name)
    api.context(context_2_name)

    api.apply(context_1_name, ContextTest)

    assert len(api.search(context_1_name)) == 1, 'Only one bundle should be in context one'
    assert len(api.search(context_2_name)) == 0, 'Context two should be empty'

    api.delete_context(context_name=context_1_name)
    api.delete_context(context_name=context_2_name)

    assert context_1_name not in api.ls_contexts(), 'Contexts should be removed'
    assert context_2_name not in api.ls_contexts(), 'Contexts should be removed'
Ejemplo n.º 21
0
def test_child_task_with_parameter():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, 'C', params={'n': 10})
    data = api.get(TEST_CONTEXT, 'c').data

    assert data == 22, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(
        api.search(TEST_CONTEXT)) == 3, 'Three bundles should be present'

    api.apply(TEST_CONTEXT, 'C', params={'n': 20})
    data = api.get(TEST_CONTEXT, 'c').data

    assert data == 42, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(api.search(TEST_CONTEXT)) == 5, 'Five bundles should be present'
Ejemplo n.º 22
0
def test_no_remote_no_push_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    with pytest.raises(Exception) as e:
        api.apply(TEST_CONTEXT, ManagedS3)
Ejemplo n.º 23
0
def test_file_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    with api.Bundle(TEST_CONTEXT, name='file_task') as b:
        f1 = b.get_file("test.txt")
        with open(f1, mode='w') as f:
            f.write('5')
        b.add_data(f1)

    output_path = api.get(TEST_CONTEXT, 'file_task').data

    with open(output_path) as f:
        output = f.read()

    assert output == '5', 'Data did not match output'
    assert type(output_path) == str, 'Data is not path'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
Ejemplo n.º 24
0
def test_A2_A3(run_test):
    """
    2.) Run A, Run A, should re-use
    3.) Run A, Run A*, should re-run
    """

    result = api.apply(TEST_CONTEXT, A)
    assert result['did_work'] is True
    first_A_uuid = api.get(TEST_CONTEXT, 'A').uuid
    result = api.apply(TEST_CONTEXT, A)
    assert result['did_work'] is False
    second_A_uuid = api.get(TEST_CONTEXT, 'A').uuid
    assert first_A_uuid == second_A_uuid
    assert len(api.search(TEST_CONTEXT, 'A')) is 1

    # Mod args, should re-run
    result = api.apply(TEST_CONTEXT, A, params={'a': 2, 'b': 3})
    assert result['did_work'] is True
    next_A_uuid = api.get(TEST_CONTEXT, 'A').uuid
    assert next_A_uuid != second_A_uuid
    assert len(api.search(TEST_CONTEXT, 'A')) is 2
Ejemplo n.º 25
0
def test_no_remote_push_non_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'
    objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER)
    assert 'Contents' not in objects, 'Bucket should be empty'

    api.apply(TEST_CONTEXT, NonManagedS3, incremental_push=True)
    print(api.cat(TEST_CONTEXT, 'b2'))
    assert len(api.search(TEST_CONTEXT)) == 1, 'One bundle should be present'

    assert os.path.exists(api.search(TEST_CONTEXT, human_name='b2')[0].data['file'][0]), \
        'Non Managed S3 file should be copied to local'
Ejemplo n.º 26
0
def test_add_with_treat_as_bundle():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    # Run test pipeline
    api.apply(TEST_CONTEXT, CIP)

    # Push bundles to remote
    for bundle_name in ['a', 'b', 'c']:
        assert api.get(TEST_CONTEXT,
                       bundle_name) is not None, 'Bundle should exist'

        api.commit(TEST_CONTEXT, bundle_name)
        api.push(TEST_CONTEXT, bundle_name)

    # Blow away context and recreate
    api.delete_context(TEST_CONTEXT)
    assert TEST_CONTEXT not in api.ls_contexts()

    api.context(context_name=TEST_CONTEXT)
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    assert api.search(TEST_CONTEXT) == [], 'Context should be empty'

    # Pull bundles from remote
    api.pull(TEST_CONTEXT)

    # Make sure all bundle meta data comes down but data remains in S3
    for bundle_name in ['a', 'b', 'c']:
        bundle = api.get(TEST_CONTEXT, bundle_name)
        assert bundle is not None, 'Bundle should exist'

        data_path = bundle.data['file'][0]
        assert data_path.startswith('s3://'), 'Data should be in S3'

    # Rerun pipeline
    api.apply(TEST_CONTEXT, BIP, params={'n': 100}, incremental_pull=True)

    # Make sure all bundles exist. Bundles a and b should have local paths
    for bundle_name in ['a', 'b', 'c']:
        bundle = api.get(TEST_CONTEXT, bundle_name)
        assert bundle is not None, 'Bundle should exist'

        data_path = bundle.data['file'][0]
        if bundle_name in ['a', 'b']:
            assert not data_path.startswith('s3://'), 'Data should be local'
        else:
            assert data_path.startswith('s3://'), 'Data should be in S3'

    api.delete_context(TEST_CONTEXT)