Exemple #1
0
def test_remote_no_push_non_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    s3_resource.create_bucket(Bucket=TEST_BUCKET_OTHER)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'
    objects = s3_client.list_objects(Bucket=TEST_BUCKET_OTHER)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    # Apply
    api.apply(TEST_CONTEXT, NonManagedS3)
    print(api.cat(TEST_CONTEXT, 'b2'))

    # Local context should not contain file if a remote exists.
    b = api.search(TEST_CONTEXT, human_name='b2')[0]
    assert not os.path.exists(
        b.data['file']
        [0]), 'Non Managed S3 file w/ remote should be copied to remote'
    assert b.data['file'][0].startswith("s3://")
def test_pull(run_test):
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    bucket = s3_resource.Bucket(TEST_BUCKET)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    api.apply(TEST_CONTEXT, RemoteTest)
    bundle = api.get(TEST_CONTEXT, 'remote_test')

    assert bundle.data == 'Hello'

    bundle.commit()
    bundle.push()

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    api.delete_context(context_name=TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)
    api.pull(TEST_CONTEXT)

    pulled_bundles = api.search(TEST_CONTEXT)
    assert len(pulled_bundles) > 0, 'Pulled bundles down'
    assert pulled_bundles[0].data == 'Hello', 'Bundle contains correct data'

    bucket.objects.all().delete()
    bucket.delete()
Exemple #3
0
def test():
    """

    Returns:

    """

    api.context(TEST_CONTEXT)
    api.remote(TEST_CONTEXT, TEST_CONTEXT, REMOTE_URL, force=True)

    with api.Bundle(TEST_CONTEXT, TEST_NAME, owner=getpass.getuser()) as b:
        for i in range(3):
            with b.add_file('output_{}'.format(i)).open('w') as of:
                of.write("some text for the {} file".format(i))

    b.commit().push()

    b.rm()

    b.pull(localize=False)

    api.apply(TEST_CONTEXT,
              '-',
              'test_output',
              'ConsumeExtDep',
              incremental_pull=True)

    api.delete_context(TEST_CONTEXT, remote=True)
Exemple #4
0
def test_add_with_treat_as_bundle():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    # Try to run the pipeline - should fail
    try:
        # Run test pipeline
        api.apply(TEST_CONTEXT, CPush, incremental_push=True)
    except Exception as e:
        pass

    # Get objects from remote
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    keys = [o['Key'] for o in objects['Contents']]
    keys = [key.split('/')[-1] for key in keys]

    # Make sure files exist in S3
    for output_file in ['a.txt', 'b.txt']:
        assert output_file in keys, 'Pipeline should have pushed file'

    api.delete_context(TEST_CONTEXT)
Exemple #5
0
def test_remote_push_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    # Setup moto s3 resources
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Make sure bucket is empty
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    # Bind remote context
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    # Apply
    api.apply(TEST_CONTEXT, ManagedS3, incremental_push=True)

    assert not os.path.exists(api.search(TEST_CONTEXT, human_name='b4')[0].data['file'][0]), \
        'Managed S3 file should not be copied to local'

    # Get objects from remote
    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    keys = [o['Key'] for o in objects['Contents']]
    keys = [key.split('/')[-1] for key in keys]

    # Make sure files exist in S3
    for output_file in ['test.parquet']:
        assert output_file in keys, 'Pipeline should have pushed file'
def test():
    """ Purpose of this test is to have one task that produces a bundle.
    And another task that requires it.

    1.) Create external dep -- also creates PreMaker_auf_datamaker
    dsdt apply - - test_external_bundle.DataMaker --int_array '[1000,2000,3000]'

    2.) Remove Premaker_auf_datamaker
    dsdt rm PreMaker_auf_datamaker

    3.) Try to run Root -- it should find DataMaker but not re-create it or PreMaker_auf_datamaker

    """

    api.context(TEST_CONTEXT)

    api.apply(TEST_CONTEXT,
              '-',
              '-',
              'DataMaker',
              params={'int_array': '[1000,2000,3000]'})

    b = api.get(TEST_CONTEXT, 'PreMaker_auf_datamaker')

    assert (b is not None)

    b.rm()

    api.apply(TEST_CONTEXT, '-', '-', 'Root')

    b = api.get(TEST_CONTEXT, 'PreMaker_auf_root')

    assert (b is not None)

    api.delete_context(TEST_CONTEXT)
Exemple #7
0
def test_push():
    api.context(context_name=TEST_CONTEXT)

    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    bucket = s3_resource.Bucket(TEST_BUCKET)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    api.apply(TEST_CONTEXT, 'RemoteTest')
    bundle = api.get(TEST_CONTEXT, 'remote_test')

    assert bundle.data == 'Hello'

    bundle.commit()
    bundle.push()

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    bucket.objects.all().delete()
    bucket.delete()
    api.delete_context(context_name=TEST_CONTEXT)
Exemple #8
0
def test_ABC7(run_test):
    """
    7.) Run A->B->C,  Run A*->B.   Run A->B->C, nothing should run

    Args:
        run_test:

    Returns:

    """

    result = api.apply(TEST_CONTEXT, C)
    assert result['success'] is True
    assert result['did_work'] is True
    B_uuid = api.get(TEST_CONTEXT, 'B').uuid

    def custom_B_requires(self):
        self.add_dependency('a', APrime, params={})

    old_requires = B.pipe_requires
    B.pipe_requires = custom_B_requires

    result = api.apply(TEST_CONTEXT, B)
    assert result['success'] is True
    assert result['did_work'] is True
    assert B_uuid != api.get(TEST_CONTEXT, 'B').uuid  # should have a new B

    B.pipe_requires = old_requires

    result = api.apply(TEST_CONTEXT, C)
    assert result['success'] is True
    assert result['did_work'] is False
Exemple #9
0
def test_AB6(run_test):
    """
    6.) Run A->B, Re-run A*.  Run A*->B, B should re-run.

    Args:
        run_test:

    Returns:

    """

    result = api.apply(TEST_CONTEXT, B)
    assert result['success'] is True
    assert result['did_work'] is True
    B_uuid = api.get(TEST_CONTEXT, 'B').uuid

    result = api.apply(TEST_CONTEXT, APrime)
    assert result['success'] is True
    assert result['did_work'] is True
    APrime_uuid = api.get(TEST_CONTEXT, 'APrime').uuid

    def custom_B_requires(self):
        self.add_dependency('a', APrime, params={})

    old_requires = B.pipe_requires
    B.pipe_requires = custom_B_requires

    result = api.apply(TEST_CONTEXT, B)
    assert result['success'] is True
    assert result['did_work'] is True
    assert APrime_uuid == api.get(TEST_CONTEXT, 'APrime').uuid
    assert B_uuid != api.get(TEST_CONTEXT, 'B').uuid

    B.pipe_requires = old_requires
Exemple #10
0
def test_ord_external_dependency_fail(run_test):
    """ Test ability to handle a failed lookup.
    Note: Disdat/Luigi swallows exceptions in tasks.  Here our tasks
    assert that they get back a bundle on their lookup.  If we catch it, then the
    test succeeds.

    Args:
        run_test:

    Returns:

    """

    uuid = create_bundle_from_pipeline()

    result = api.apply(TEST_CONTEXT,
                       PipelineA,
                       params={
                           'test_param': 'never run before',
                           'throw_assert': False
                       })

    assert result['success'] is True

    try:
        result = api.apply(TEST_CONTEXT,
                           PipelineA,
                           params={'test_param': 'never run before'})
    except AssertionError as ae:
        print("ERROR: {}".format(ae))
        return
def test(run_test):
    """ Purpose of this test is to have one task that produces a bundle.
    And another task that requires it.

    1.) Run DataMaker which runs PreMaker
    2.) Assert that those ran, and remove PreMaker
    3.) run Root_1 which needs DataMaker (external dep) and PreMaker
    4.) assert that premaker re-ran and root ran successfully (getting external dependency)

    """

    api.context(TEST_CONTEXT)

    api.apply(TEST_CONTEXT,
              DataMaker,
              params={'int_array': [1000, 2000, 3000]})

    b = api.get(TEST_CONTEXT, 'PreMaker')
    assert (b is not None)
    pm_uuid = b.uuid
    b.rm()

    api.apply(TEST_CONTEXT, Root_1)

    b = api.get(TEST_CONTEXT, 'PreMaker')
    assert (b is not None)
    assert (b.uuid != pm_uuid)

    b = api.get(TEST_CONTEXT, 'Root_1')
    assert (b is not None)

    api.delete_context(TEST_CONTEXT)
def test_name_external_dependency():

    uuid = create_bundle_from_pipeline()

    print("UUID of created bundle is {}".format(uuid))

    # Ext dep by human name
    api.apply(TEST_CONTEXT, PipelineC, params={'ext_name': EXT_BUNDLE_NAME})
def test_uuid_external_dependency():

    uuid = create_bundle_from_pipeline()

    print("UUID of created bundle is {}".format(uuid))

    # Ext dep by specific UUID
    api.apply(TEST_CONTEXT, PipelineB, params={'ext_uuid': uuid})
def test_ord_external_dependency():

    uuid = create_bundle_from_pipeline()

    print("UUID of created bundle is {}".format(uuid))

    # Ordinary ext dep
    api.apply(TEST_CONTEXT, PipelineA)
Exemple #15
0
def test_no_remote_no_push_managed_s3():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    with pytest.raises(Exception) as e:
        api.apply(TEST_CONTEXT, ManagedS3)
Exemple #16
0
def test_uuid_external_dependency(run_test):

    uuid = create_bundle_from_pipeline()

    api.apply(TEST_CONTEXT, PipelineB, params={'ext_uuid': uuid})

    result = api.apply(TEST_CONTEXT, PipelineB, params={'ext_uuid': uuid})
    assert result['success'] is True
    assert result['did_work'] is False
def create_bundle_from_pipeline():
    """ Run the internal pipeline, create a bundle, return the uuid
    """

    api.apply(TEST_CONTEXT,
              ExternalPipeline,
              params={'test_param': EXT_TASK_PARAM_VAL},
              output_bundle=EXT_BUNDLE_NAME)
    b = api.get(TEST_CONTEXT, EXT_BUNDLE_NAME)
    return b.uuid
def test_float_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, FloatTask)
    data = api.get(TEST_CONTEXT, 'float_task').data

    assert data == 2.5, 'Data did not match output'
    assert type(data) == float, 'Data is not float'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
def test_string_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, StringTask)
    data = api.get(TEST_CONTEXT, 'string_task').data

    assert data == 'output', 'Data did not match output'
    assert type(data) == six.text_type, 'Data is not string'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
def test_list_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, ListTask)
    data = api.get(TEST_CONTEXT, 'list_task').data

    assert np.array_equal(data, [1, 2, 3]), 'Data did not match output'
    assert type(data) == np.ndarray, 'Data is not list'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
Exemple #21
0
def test_dependant_tasks():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, 'C')
    data = api.get(TEST_CONTEXT, 'c').data

    assert data == 6, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(
        api.search(TEST_CONTEXT)) == 3, 'Three bundles should be present'
def test_dict_task():
    setup()
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, DictTask)
    data = api.get(TEST_CONTEXT, 'dict_task').data

    assert data == {'hello': ['world']}, 'Data did not match output'
    assert type(data) == dict, 'Data is not dict'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
def test_file_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, FileTask)
    output_path = api.get(TEST_CONTEXT, 'file_task').data

    with open(output_path) as f:
        output = f.read()

    assert output == '5', 'Data did not match output'
    assert type(output_path) == str, 'Data is not path'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
def test_df_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, DataFrameTask)
    data = api.get(TEST_CONTEXT, 'df_task').data

    df = pd.DataFrame()
    df['a'] = [1, 2, 3]

    assert df.equals(data), 'Data did not match output'
    assert type(data) == pd.DataFrame, 'Data is not df'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
Exemple #25
0
def test_non_managed_local():
    api.delete_context(TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, NonManagedLocal)
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
    print(api.cat(TEST_CONTEXT, 'b1'))

    assert os.path.exists(api.search(TEST_CONTEXT, human_name='b1')[0].data['file'][0]), \
        'Local file should be present in bundle'
def test(run_test):
    """ This tests if apply force=True and force_all=True re-run everything.
    We have two tasks. One depends on the other.
    force_all should re-run both, force should re-run only the last.
    """

    # first run there should be no bundles
    #assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.apply(TEST_CONTEXT, A, params={})
    first_B_uuid = api.get(TEST_CONTEXT, 'B').uuid
    first_A_uuid = api.get(TEST_CONTEXT, 'A').uuid

    # second, force re-run last task
    api.apply(TEST_CONTEXT, A, force=True, params={})
    one_B_uuid = api.get(TEST_CONTEXT, 'B').uuid
    one_A_uuid = api.get(TEST_CONTEXT, 'A').uuid
    assert (first_B_uuid == one_B_uuid)
    assert (first_A_uuid != one_A_uuid)

    # second, force all to re-run.
    api.apply(TEST_CONTEXT, A, force_all=True, params={})
    all_B_uuid = api.get(TEST_CONTEXT, 'B').uuid
    all_A_uuid = api.get(TEST_CONTEXT, 'A').uuid
    assert (all_B_uuid != one_B_uuid)
    assert (all_A_uuid != one_A_uuid)

    # third, make sure a force_all doesn't crash if there is an external bundle.
    api.apply(TEST_CONTEXT, A, force_all=True, params={'set_ext_dep': True})
    final_B_uuid = api.get(TEST_CONTEXT, 'B').uuid
    final_A_uuid = api.get(TEST_CONTEXT, 'A').uuid
    assert (final_B_uuid == all_B_uuid)
    assert (final_A_uuid != all_A_uuid)
Exemple #27
0
def test_single_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, 'A')
    data = api.get(TEST_CONTEXT, 'a').data

    assert data == 2, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'

    api.apply(TEST_CONTEXT, 'A')
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'
Exemple #28
0
def test():
    """ Purpose of this test is to have one task that produces a bundle.
    And another task that requires it.

    1.) Create external dep -- also creates PreMaker_auf_datamaker
    dsdt apply - - test_external_bundle.DataMaker --int_array '[1000,2000,3000]'

    2.) Remove Premaker_auf_datamaker
    dsdt rm PreMaker_auf_datamaker

    3.) Try to run Root -- it should find DataMaker but not re-create it or PreMaker_auf_datamaker

    """

    api.context(TEST_CONTEXT)

    result = None
    try:
        result = api.apply(TEST_CONTEXT,
                           Root2,
                           output_bundle='test_api_exit',
                           params={},
                           force=True,
                           workers=2)
    except Exception as e:
        print("Got exception {} result {} ".format(e, e.result))
        assert (e.result['did_work'])
        assert (not e.result['success'])
    finally:
        print("API apply returned {}".format(result))
Exemple #29
0
def test_AB4(run_test):
    """
    4.) Run A->B, Re-run A*.  Run A->B, nothing should run.
    """

    result = api.apply(TEST_CONTEXT, B)
    assert result['success'] is True
    assert result['did_work'] is True

    result = api.apply(TEST_CONTEXT, A, params={'a': 2, 'b': 3})
    assert result['success'] is True
    assert result['did_work'] is True

    result = api.apply(TEST_CONTEXT, B)
    assert result['success'] is True
    assert result['did_work'] is False
Exemple #30
0
def test_task_with_parameter():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, 'B', params={'n': 10})
    data = api.get(TEST_CONTEXT, 'b').data

    assert data == 20, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(api.search(TEST_CONTEXT)) == 1, 'One bundle should be present'

    api.apply(TEST_CONTEXT, 'B', params={'n': 20})
    data = api.get(TEST_CONTEXT, 'b').data

    assert data == 40, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(api.search(TEST_CONTEXT)) == 2, 'Two bundles should be present'