Exemple #1
0
def deprecated_data_as_bundle_not_csv(tmpdir):

    # Create Context
    api.context(TEST_CONTEXT)

    # Create test .txt file
    test_txt_path = os.path.join(str(tmpdir), 'test.txt')
    with open(test_txt_path, 'w') as f:
        f.write('this should not create a bundle')

    # Assert the txt file exists
    assert os.path.exists(test_txt_path)

    # Try to add file to the bundle
    with pytest.raises(AssertionError) as ex:
        api.add(TEST_CONTEXT,
                'bad_path',
                test_txt_path,
                treat_file_as_bundle=True)

    # Assert Exited with error code of 1
    assert ex.type == AssertionError

    # Make sure bundle does not exist
    assert api.get(
        TEST_CONTEXT,
        'test_file_as_bundle_txt_file') is None, 'Bundle should not exist'

    api.delete_context(TEST_CONTEXT)
Exemple #2
0
def test_add_remote_fail():
    error = None
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3', region_name='us-east-1')

    # Bind remote context with just bucket
    try:
        api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)
    except Exception as e:
        error = e
    finally:
        assert (type(error) == RuntimeError)

    # Bind remote to new context with bucket and key
    try:
        api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_KEY_URL)
    except Exception as e:
        error = e
    finally:
        assert (type(error) == RuntimeError)

    api.delete_context(TEST_CONTEXT)
def test():
    """ Purpose of this test is to have one task that produces a bundle.
    And another task that requires it.

    1.) Create external dep -- also creates PreMaker_auf_datamaker
    dsdt apply - - test_external_bundle.DataMaker --int_array '[1000,2000,3000]'

    2.) Remove Premaker_auf_datamaker
    dsdt rm PreMaker_auf_datamaker

    3.) Try to run Root -- it should find DataMaker but not re-create it or PreMaker_auf_datamaker

    """

    api.context(TEST_CONTEXT)

    api.apply(TEST_CONTEXT,
              '-',
              '-',
              'DataMaker',
              params={'int_array': '[1000,2000,3000]'})

    b = api.get(TEST_CONTEXT, 'PreMaker_auf_datamaker')

    assert (b is not None)

    b.rm()

    api.apply(TEST_CONTEXT, '-', '-', 'Root')

    b = api.get(TEST_CONTEXT, 'PreMaker_auf_root')

    assert (b is not None)

    api.delete_context(TEST_CONTEXT)
Exemple #4
0
def test_remote_push_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    # Apply
    api.apply(TEST_CONTEXT, ManagedS3, incremental_push=True)

    assert not os.path.exists(api.search(TEST_CONTEXT, human_name='b4')[0].data['file'][0]), \
        'Managed S3 file should not be copied to local'

    # Get objects from remote
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    keys = [o['Key'] for o in objects['Contents']]
    keys = [key.split('/')[-1] for key in keys]

    # Make sure files exist in S3
    for output_file in ['test.parquet']:
        assert output_file in keys, 'Pipeline should have pushed file'
Exemple #5
0
def test_remote_no_push_non_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'
    objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    # Apply
    api.apply(TEST_CONTEXT, NonManagedS3)
    print(api.cat(TEST_CONTEXT, 'b2'))

    # Local context should not contain file if a remote exists.
    b = api.search(TEST_CONTEXT, human_name='b2')[0]
    assert not os.path.exists(
        b.data['file']
        [0]), 'Non Managed S3 file w/ remote should be copied to remote'
    assert b.data['file'][0].startswith("s3://")
Exemple #6
0
def test():
    """ This tests if mark_force works for tasks.
    We have two tasks. One depends on the other.  The upstream is marked
    mark_force and should always run.
    """
    def run_and_get(name, do_ext=False):
        api.apply(TEST_CONTEXT, 'A_2', params={'set_ext_dep': do_ext})
        b = api.get(TEST_CONTEXT, 'B')
        print("Run {}: b.creation_date {} b.uuid {}".format(
            name, b.creation_date, b.uuid))
        return b

    api.delete_context(TEST_CONTEXT)
    api.context(TEST_CONTEXT)

    b = run_and_get("One")
    first_uuid = b.uuid

    b = run_and_get("Two")
    assert (first_uuid != b.uuid)
    second_uuid = b.uuid

    b = run_and_get("Three", do_ext=True)
    assert (second_uuid == b.uuid)

    api.delete_context(TEST_CONTEXT)
Exemple #7
0
def test():
    """

    Returns:

    """

    api.context(TEST_CONTEXT)
    api.remote(TEST_CONTEXT, TEST_CONTEXT, REMOTE_URL, force=True)

    with api.Bundle(TEST_CONTEXT, TEST_NAME, owner=getpass.getuser()) as b:
        for i in range(3):
            with b.add_file('output_{}'.format(i)).open('w') as of:
                of.write("some text for the {} file".format(i))

    b.commit().push()

    b.rm()

    b.pull(localize=False)

    api.apply(TEST_CONTEXT,
              '-',
              'test_output',
              'ConsumeExtDep',
              incremental_pull=True)

    api.delete_context(TEST_CONTEXT, remote=True)
Exemple #8
0
def test_add_with_treat_as_bundle():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    # Try to run the pipeline - should fail
    try:
        # Run test pipeline
        api.apply(TEST_CONTEXT, CPush, incremental_push=True)
    except Exception as e:
        pass

    # Get objects from remote
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    keys = [o['Key'] for o in objects['Contents']]
    keys = [key.split('/')[-1] for key in keys]

    # Make sure files exist in S3
    for output_file in ['a.txt', 'b.txt']:
        assert output_file in keys, 'Pipeline should have pushed file'

    api.delete_context(TEST_CONTEXT)
Exemple #9
0
def test_push():
    api.context(context_name=TEST_CONTEXT)

    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    bucket = s3_resource.Bucket(TEST_BUCKET)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    api.apply(TEST_CONTEXT, 'RemoteTest')
    bundle = api.get(TEST_CONTEXT, 'remote_test')

    assert bundle.data == 'Hello'

    bundle.commit()
    bundle.push()

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    bucket.objects.all().delete()
    bucket.delete()
    api.delete_context(context_name=TEST_CONTEXT)
def test(run_test):
    """ Purpose of this test is to have one task that produces a bundle.
    And another task that requires it.

    1.) Run DataMaker which runs PreMaker
    2.) Assert that those ran, and remove PreMaker
    3.) run Root_1 which needs DataMaker (external dep) and PreMaker
    4.) assert that premaker re-ran and root ran successfully (getting external dependency)

    """

    api.context(TEST_CONTEXT)

    api.apply(TEST_CONTEXT,
              DataMaker,
              params={'int_array': [1000, 2000, 3000]})

    b = api.get(TEST_CONTEXT, 'PreMaker')
    assert (b is not None)
    pm_uuid = b.uuid
    b.rm()

    api.apply(TEST_CONTEXT, Root_1)

    b = api.get(TEST_CONTEXT, 'PreMaker')
    assert (b is not None)
    assert (b.uuid != pm_uuid)

    b = api.get(TEST_CONTEXT, 'Root_1')
    assert (b is not None)

    api.delete_context(TEST_CONTEXT)
Exemple #11
0
def test():
    """ Purpose of this test is to have one task that produces a bundle.
    And another task that requires it.

    1.) Create external dep -- also creates PreMaker_auf_datamaker
    dsdt apply - - test_external_bundle.DataMaker --int_array '[1000,2000,3000]'

    2.) Remove Premaker_auf_datamaker
    dsdt rm PreMaker_auf_datamaker

    3.) Try to run Root -- it should find DataMaker but not re-create it or PreMaker_auf_datamaker

    """

    api.context(TEST_CONTEXT)

    result = None
    try:
        result = api.apply(TEST_CONTEXT,
                           Root2,
                           output_bundle='test_api_exit',
                           params={},
                           force=True,
                           workers=2)
    except Exception as e:
        print("Got exception {} result {} ".format(e, e.result))
        assert (e.result['did_work'])
        assert (not e.result['success'])
    finally:
        print("API apply returned {}".format(result))
Exemple #12
0
def test_add_remote():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3', region_name='us-east-1')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context with just bucket
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    with api.Bundle(TEST_CONTEXT) as b:
        b.name = 'output'
        b.add_data([1, 3, 5])

    b.commit()
    b.push()

    # Bind remote to new context with bucket and key
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_KEY_URL)

    with api.Bundle(TEST_CONTEXT) as b:
        b.name = 'output'
        b.add_data([1, 3, 5])

    b.commit()
    b.push()

    api.delete_context(TEST_CONTEXT)
Exemple #13
0
def test_pull(run_test):
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    bucket = s3_resource.Bucket(TEST_BUCKET)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    api.apply(TEST_CONTEXT, RemoteTest)
    bundle = api.get(TEST_CONTEXT, 'remote_test')

    assert bundle.data == 'Hello'

    bundle.commit()
    bundle.push()

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    api.delete_context(context_name=TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)
    api.pull(TEST_CONTEXT)

    pulled_bundles = api.search(TEST_CONTEXT)
    assert len(pulled_bundles) > 0, 'Pulled bundles down'
    assert pulled_bundles[0].data == 'Hello', 'Bundle contains correct data'

    bucket.objects.all().delete()
    bucket.delete()
Exemple #14
0
def test_no_remote_no_push_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    with pytest.raises(Exception) as e:
        api.apply(TEST_CONTEXT, ManagedS3)
Exemple #15
0
def test_create_context():
    context_name = '__test__'
    assert context_name not in api.ls_contexts(), 'Context exists'

    api.context(context_name)
    assert context_name in api.ls_contexts(), 'Test context does exists'
    api.delete_context(context_name=context_name)
    assert context_name not in api.ls_contexts(), 'Test context exists'
Exemple #16
0
def test_non_managed_local():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, NonManagedLocal)
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
    print(api.cat(TEST_CONTEXT, 'b1'))

    assert os.path.exists(api.search(TEST_CONTEXT, human_name='b1')[0].data['file'][0]), \
        'Local file should be present in bundle'
Exemple #17
0
def context():

    try:
        print('ensuring disdat is initialized')
        common.DisdatConfig.init()
    except:
        print('disdat already initialized, no worries...')

    print('creating temporary local context')
    context = uuid.uuid1().hex
    api.context(context)

    yield context

    print('deleting temporary local context')
    api.delete_context(context)
Exemple #18
0
def test_independent_context():
    context_1_name = '__test_context_1__'
    context_2_name = '__test_context_2__'

    api.context(context_1_name)
    api.context(context_2_name)

    api.apply(context_1_name, ContextTest)

    assert len(api.search(context_1_name)) == 1, 'Only one bundle should be in context one'
    assert len(api.search(context_2_name)) == 0, 'Context two should be empty'

    api.delete_context(context_name=context_1_name)
    api.delete_context(context_name=context_2_name)

    assert context_1_name not in api.ls_contexts(), 'Contexts should be removed'
    assert context_2_name not in api.ls_contexts(), 'Contexts should be removed'
Exemple #19
0
def test_remote_no_push_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    with pytest.raises(Exception) as e:
        api.apply(TEST_CONTEXT, ManagedS3)
def _setup(remote=True):
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3', region_name='us-east-1')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    if remote:
        api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    return s3_client
Exemple #21
0
def test_single_file(tmpdir):

    # Create Context
    api.context(TEST_CONTEXT)

    # Create test .csv file
    test_csv_path = os.path.join(str(tmpdir), 'test.csv')
    df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
    df.to_csv(test_csv_path)

    # Assert csv_file_exits
    assert os.path.exists(test_csv_path)

    # Add the file to the bundle
    api.add(TEST_CONTEXT, 'test_single_file', test_csv_path)

    # Retrieve the bundle
    b = api.get(TEST_CONTEXT, 'test_single_file')

    # Assert the bundles contain the same data
    bundle_hash, file_hash = get_hash(b.data), get_hash(test_csv_path)
    assert bundle_hash == file_hash, 'Hashes do not match'

    # Test with tags
    tag = {'test': 'tag'}
    api.add(TEST_CONTEXT, 'test_single_file', test_csv_path, tags=tag)

    # Retrieve the bundle
    b = api.get(TEST_CONTEXT, 'test_single_file')

    # Assert the bundles contain the same data
    bundle_hash, file_hash = get_hash(b.data), get_hash(test_csv_path)
    assert bundle_hash == file_hash, 'Hashes do not match'
    assert b.tags == tag, 'Tags do not match'

    # Remove test .csv
    os.remove(test_csv_path)

    # Assert that data still remains in the bundle
    assert api.get(TEST_CONTEXT,
                   'test_single_file') is not None, 'Bundle should exist'

    api.delete_context(TEST_CONTEXT)
Exemple #22
0
def test_add_bad_path(tmpdir):
        # Create Context
        api.context(TEST_CONTEXT)

        # Create path to csv file but don't create file
        test_csv_path = os.path.join(str(tmpdir), 'test.csv')

        # Assert csv file does not exist
        assert not os.path.exists(test_csv_path)

        # Try to add file to the bundle
        with pytest.raises(AssertionError) as ex:
            api.add(TEST_CONTEXT, 'bad_path', test_csv_path)

        # Assert Exited with error code of 1
        assert ex.type == AssertionError

        # Make sure bundle does not exist
        assert api.get(TEST_CONTEXT, 'test_file_as_bundle_txt_file') is None, 'Bundle should not exist'

        api.delete_context(TEST_CONTEXT)
Exemple #23
0
def test_no_remote_push_non_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'
    objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER)
    assert 'Contents' not in objects, 'Bucket should be empty'

    api.apply(TEST_CONTEXT, NonManagedS3, incremental_push=True)
    print(api.cat(TEST_CONTEXT, 'b2'))
    assert len(api.search(TEST_CONTEXT)) == 1, 'One bundle should be present'

    assert os.path.exists(api.search(TEST_CONTEXT, human_name='b2')[0].data['file'][0]), \
        'Non Managed S3 file should be copied to local'
Exemple #24
0
def setup():
    if TEST_CONTEXT in api.ls_contexts():
        api.delete_context(context_name=TEST_CONTEXT)

    api.context(context_name=TEST_CONTEXT)
Exemple #25
0
    """

    uuid = create_bundle_from_pipeline()
    try:
        result = api.apply(TEST_CONTEXT,
                           PipelineC,
                           params={'ext_name': 'not a bundle name'})
    except AssertionError as ae:
        print("ERROR: {}".format(ae))
        return


if __name__ == '__main__':
    if False:
        api.delete_context(context_name=TEST_CONTEXT)
        api.context(context_name=TEST_CONTEXT)

        test_ord_external_dependency_fail(run_test)

        api.delete_context(context_name=TEST_CONTEXT)
        api.context(context_name=TEST_CONTEXT)

        test_uuid_external_dependency_fail(run_test)

        api.delete_context(context_name=TEST_CONTEXT)
        api.context(context_name=TEST_CONTEXT)

        test_name_external_dependency_fail(run_test)
    else:
        pytest.main([__file__])
Exemple #26
0
def test_add_with_treat_as_bundle(tmpdir):
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    local_paths = []
    s3_paths = []

    # Create and upload test.csv file
    key = 'test.csv'
    test_csv_path = os.path.join(str(tmpdir), key)
    df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
    df.to_csv(test_csv_path)

    s3_resource.meta.client.upload_file(test_csv_path, TEST_BUCKET, key)
    s3_path = "s3://{}/{}".format(TEST_BUCKET, key)

    local_paths.append(test_csv_path)
    s3_paths.append(s3_path)

    # Create and uploadt test.txt file
    key = 'text.txt'
    test_txt_path = os.path.join(str(tmpdir), key)
    with open(test_txt_path, 'w') as f:
        f.write('Test')

    s3_resource.meta.client.upload_file(test_txt_path, TEST_BUCKET, key)
    s3_path = "s3://{}/{}".format(TEST_BUCKET, key)

    local_paths.append(test_txt_path)
    s3_paths.append(s3_path)

    bool_values = [True, False]
    string_values = ['a', 'b']
    float_values = [1.3, 3.5]
    int_values = [4, 5]

    # Build bundle dataframe
    bundle_df = pd.DataFrame({
        'local_paths': local_paths,
        's3_paths': s3_paths,
        'bools': bool_values,
        'strings': string_values,
        'floats': float_values,
        'ints': int_values
    })

    bundle_df_path = os.path.join(str(tmpdir), 'bundle.csv')
    bundle_df.to_csv(bundle_df_path)

    # Add bundle dataframe
    api.add(TEST_CONTEXT, 'test_add_bundle', bundle_df_path, treat_file_as_bundle=True)

    # Assert that data in bundle is a dataframe
    b = api.get(TEST_CONTEXT, 'test_add_bundle')
    assert(isinstance(b.data, pd.DataFrame))

    # Add bundle dataframe with tags
    tag = {'test': 'tag'}
    api.add(TEST_CONTEXT, 'test_add_bundle', bundle_df_path, treat_file_as_bundle=True, tags=tag)

    # Assert that data in bundle is a dataframe
    b = api.get(TEST_CONTEXT, 'test_add_bundle')
    assert(isinstance(b.data, pd.DataFrame))
    assert b.tags == tag, 'Tags do not match'

    api.delete_context(TEST_CONTEXT)
Exemple #27
0
def test_add_with_treat_as_bundle():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    # Run test pipeline
    api.apply(TEST_CONTEXT, CIP)

    # Push bundles to remote
    for bundle_name in ['a', 'b', 'c']:
        assert api.get(TEST_CONTEXT,
                       bundle_name) is not None, 'Bundle should exist'

        api.commit(TEST_CONTEXT, bundle_name)
        api.push(TEST_CONTEXT, bundle_name)

    # Blow away context and recreate
    api.delete_context(TEST_CONTEXT)
    assert TEST_CONTEXT not in api.ls_contexts()

    api.context(context_name=TEST_CONTEXT)
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    assert api.search(TEST_CONTEXT) == [], 'Context should be empty'

    # Pull bundles from remote
    api.pull(TEST_CONTEXT)

    # Make sure all bundle meta data comes down but data remains in S3
    for bundle_name in ['a', 'b', 'c']:
        bundle = api.get(TEST_CONTEXT, bundle_name)
        assert bundle is not None, 'Bundle should exist'

        data_path = bundle.data['file'][0]
        assert data_path.startswith('s3://'), 'Data should be in S3'

    # Rerun pipeline
    api.apply(TEST_CONTEXT, BIP, params={'n': 100}, incremental_pull=True)

    # Make sure all bundles exist. Bundles a and b should have local paths
    for bundle_name in ['a', 'b', 'c']:
        bundle = api.get(TEST_CONTEXT, bundle_name)
        assert bundle is not None, 'Bundle should exist'

        data_path = bundle.data['file'][0]
        if bundle_name in ['a', 'b']:
            assert not data_path.startswith('s3://'), 'Data should be local'
        else:
            assert data_path.startswith('s3://'), 'Data should be in S3'

    api.delete_context(TEST_CONTEXT)
Exemple #28
0
def test_add_directory(tmpdir):
    # Create Context
    api.context(TEST_CONTEXT)

    # Directory Structure
    # - test.csv
    # - second/test_1.txt
    # - second/test_2.txt
    # - second/third/test_3.txt
    # - second/third/test_4.txt
    level_1 = ''

    level_2 = os.path.join(level_1, 'second')
    os.mkdir(os.path.join(str(tmpdir), level_2))

    level_3 = os.path.join(level_2, 'third')
    os.mkdir(os.path.join(str(tmpdir), level_3))

    # Dictionary to hold paths
    path_dict = {}

    # Create files and save paths
    test_csv_name = 'test.csv'
    test_csv_path = os.path.join(level_1, test_csv_name)
    test_csv_abs_path = os.path.join(str(tmpdir), test_csv_path)
    df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
    df.to_csv(test_csv_abs_path)

    path_dict[test_csv_name] = (test_csv_abs_path, test_csv_path.split('/'))

    test_text_1_name = 'test_1.txt'
    test_text_1_path = os.path.join(level_2, test_text_1_name)
    test_text_name_1_abs_path = os.path.join(str(tmpdir), test_text_1_path)
    with open(test_text_name_1_abs_path, 'w') as f:
        f.write('Hello!')

    path_dict[test_text_1_name] = (test_text_name_1_abs_path,
                                   test_text_1_path.split('/'))

    test_text_2_name = 'test_2.txt'
    test_text_2_path = os.path.join(level_2, test_text_2_name)
    test_text_name_2_abs_path = os.path.join(str(tmpdir), test_text_2_path)
    with open(test_text_name_2_abs_path, 'w') as f:
        f.write('Hello!')

    path_dict[test_text_2_name] = (test_text_name_2_abs_path,
                                   test_text_2_path.split('/'))

    test_text_3_name = 'test_3.txt'
    test_text_3_path = os.path.join(level_3, test_text_3_name)
    test_text_name_3_abs_path = os.path.join(str(tmpdir), test_text_3_path)
    with open(test_text_name_3_abs_path, 'w') as f:
        f.write('Third Hello!')

    path_dict[test_text_3_name] = (test_text_name_3_abs_path,
                                   test_text_3_path.split('/'))

    test_text_4_name = 'test_4.txt'
    test_text_4_path = os.path.join(level_3, test_text_4_name)
    test_text_name_4_abs_path = os.path.join(str(tmpdir), test_text_4_path)
    with open(test_text_name_4_abs_path, 'w') as f:
        f.write('Third World!')

    path_dict[test_text_4_name] = (test_text_name_4_abs_path,
                                   test_text_4_path.split('/'))

    # Assert files exist
    assert os.path.exists(test_csv_abs_path)
    assert os.path.exists(test_text_name_1_abs_path)
    assert os.path.exists(test_text_name_2_abs_path)
    assert os.path.exists(test_text_name_3_abs_path)
    assert os.path.exists(test_text_name_4_abs_path)

    # Add the directory to the bundle
    api.add(TEST_CONTEXT, 'test_directory', str(tmpdir))

    # Assert check sums are the same
    b = api.get(TEST_CONTEXT, 'test_directory')
    for f in b.data:
        bundle_file_name = f.split('/')[-1]
        local_abs_path, local_split_path = path_dict[bundle_file_name]

        # Make sure paths match
        assert get_hash(f) == get_hash(local_abs_path), 'Hashes do not match'

        bundle_path = os.path.join(*f.split('/')[-len(local_split_path):])
        local_path = os.path.join(*local_split_path)

        assert local_path == bundle_path, 'Bundle should have the same directory structure'

    # Add the directory to the bundle with tags
    tag = {'test': 'tag'}
    api.add(TEST_CONTEXT, 'test_directory', str(tmpdir), tags=tag)

    # Assert check sums are the same
    b = api.get(TEST_CONTEXT, 'test_directory')
    for f in b.data:
        bundle_file_name = f.split('/')[-1]
        local_abs_path, local_split_path = path_dict[bundle_file_name]

        # Make sure paths match
        assert get_hash(f) == get_hash(local_abs_path), 'Hashes do not match'

        # Make sure directory structure stays the same
        local_path = os.path.join(*local_split_path)
        bundle_path = os.path.join(*f.split('/')[-len(local_split_path):])

        assert local_path == bundle_path, 'Bundle should have the same directory structure'

    # Make sure tags exist
    assert b.tags == tag, 'Tags do not match'

    api.delete_context(TEST_CONTEXT)
Exemple #29
0
def test():
    """ Test the api.run() function.

    1.) Create the container via the api

    2.) Create a test context

    3.) Call run locally

    4.) Call run on AWS Batch (need to add MonkeyPatch)

    """

    test_arg = [1000, 2000, 8000]

    api.context(TEST_CONTEXT)
    api.remote(TEST_CONTEXT, TEST_CONTEXT, TEST_REMOTE, force=True)

    print("--0: Create docker container")
    api.dockerize(SETUP_DIR, PIPELINE_CLS)

    print("--1: Running container locally and storing results locally...")
    retval = api.run(TEST_CONTEXT,
                     TEST_CONTEXT,
                     OUTPUT_BUNDLE,
                     PIPELINE_CLS,
                     pipeline_args={'int_array': test_arg},
                     remote=TEST_REMOTE,
                     no_pull=True,
                     no_push=True)
    print("--1: 100 chars of RETVAL {}".format(retval[:100]))
    b = api.get(TEST_CONTEXT, OUTPUT_BUNDLE)
    assert (b is not None)
    print("--1: Pipeline tried to store {} and we found {}".format(
        test_arg, b.cat()))
    assert (np.array_equal(b.cat(), test_arg))
    b.rm()

    print("--2: Running container locally and pushing results ...")
    retval = api.run(TEST_CONTEXT,
                     TEST_CONTEXT,
                     OUTPUT_BUNDLE,
                     PIPELINE_CLS,
                     pipeline_args={'int_array': test_arg},
                     remote=TEST_REMOTE,
                     no_pull=True,
                     no_push=False)
    print("--2: 100 chars of RETVAL {}".format(retval[:100]))
    print("--2B: Removing local output bundle...")
    api.get(TEST_CONTEXT, OUTPUT_BUNDLE).rm()
    print("--2C: Pulling remote bundle and verifying...")
    api.pull(TEST_CONTEXT)
    b = api.get(TEST_CONTEXT, OUTPUT_BUNDLE)
    print("--2C: Pipeline tried to store {} and we found {}".format(
        test_arg, b.cat()))
    assert (np.array_equal(b.cat(), test_arg))
    b.rm()

    print("--3: Running container on AWS pulling and pushing results ...")
    print("--3B: Push docker container")
    api.dockerize(SETUP_DIR, PIPELINE_CLS, push=True)
    print("--3C: Run docker container on AWS Batch")
    retval = api.run(TEST_CONTEXT,
                     TEST_CONTEXT,
                     OUTPUT_BUNDLE,
                     PIPELINE_CLS,
                     pipeline_args={'int_array': test_arg},
                     remote=TEST_REMOTE,
                     backend='AWSBatch')
    print("--3C: RETVAL {}".format(retval))
    print("--3D: Pulling remote bundle and verifying...")
    api.pull(TEST_CONTEXT)
    b = api.get(TEST_CONTEXT, OUTPUT_BUNDLE)
    print("--3D: Pipeline tried to store {} and we found {}".format(
        test_arg, b.cat()))
    assert (np.array_equal(b.cat(), test_arg))
    b.rm()

    print("--4: Running with no submit ...")
    print("--4B: Reusing docker container")
    print("--4C: Submit Job on AWS Batch")
    retval = api.run(TEST_CONTEXT,
                     TEST_CONTEXT,
                     OUTPUT_BUNDLE,
                     PIPELINE_CLS,
                     pipeline_args={'int_array': test_arg},
                     remote=TEST_REMOTE,
                     backend='AWSBatch',
                     no_submit=True)
    print("--4C: RETVAL {}".format(retval))

    #api.apply(TEST_CONTEXT, '-', '-', 'Root'
    # )

    #b = api.get(TEST_CONTEXT, 'PreMaker_auf_root')

    #assert(b is not None)

    api.delete_context(TEST_CONTEXT)