Python get Examples, disdat.api.get Python Examples

Example #1

0

Show file

def test_ABC7(run_test):
    """
    7.) Run A->B->C,  Run A*->B.   Run A->B->C, nothing should run

    Args:
        run_test:

    Returns:

    """

    result = api.apply(TEST_CONTEXT, C)
    assert result['success'] is True
    assert result['did_work'] is True
    B_uuid = api.get(TEST_CONTEXT, 'B').uuid

    def custom_B_requires(self):
        self.add_dependency('a', APrime, params={})

    old_requires = B.pipe_requires
    B.pipe_requires = custom_B_requires

    result = api.apply(TEST_CONTEXT, B)
    assert result['success'] is True
    assert result['did_work'] is True
    assert B_uuid != api.get(TEST_CONTEXT, 'B').uuid  # should have a new B

    B.pipe_requires = old_requires

    result = api.apply(TEST_CONTEXT, C)
    assert result['success'] is True
    assert result['did_work'] is False

Example #2

0

Show file

File: test_external_bundle.py Project: sayantansatpati/disdat

def test(run_test):
    """ Purpose of this test is to have one task that produces a bundle.
    And another task that requires it.

    1.) Run DataMaker which runs PreMaker
    2.) Assert that those ran, and remove PreMaker
    3.) run Root_1 which needs DataMaker (external dep) and PreMaker
    4.) assert that premaker re-ran and root ran successfully (getting external dependency)

    """

    api.context(TEST_CONTEXT)

    api.apply(TEST_CONTEXT,
              DataMaker,
              params={'int_array': [1000, 2000, 3000]})

    b = api.get(TEST_CONTEXT, 'PreMaker')
    assert (b is not None)
    pm_uuid = b.uuid
    b.rm()

    api.apply(TEST_CONTEXT, Root_1)

    b = api.get(TEST_CONTEXT, 'PreMaker')
    assert (b is not None)
    assert (b.uuid != pm_uuid)

    b = api.get(TEST_CONTEXT, 'Root_1')
    assert (b is not None)

    api.delete_context(TEST_CONTEXT)

Example #3

0

Show file

File: test_external_bundle.py Project: jonathanlunt/disdat

def test():
    """ Purpose of this test is to have one task that produces a bundle.
    And another task that requires it.

    1.) Create external dep -- also creates PreMaker_auf_datamaker
    dsdt apply - - test_external_bundle.DataMaker --int_array '[1000,2000,3000]'

    2.) Remove Premaker_auf_datamaker
    dsdt rm PreMaker_auf_datamaker

    3.) Try to run Root -- it should find DataMaker but not re-create it or PreMaker_auf_datamaker

    """

    api.context(TEST_CONTEXT)

    api.apply(TEST_CONTEXT,
              '-',
              '-',
              'DataMaker',
              params={'int_array': '[1000,2000,3000]'})

    b = api.get(TEST_CONTEXT, 'PreMaker_auf_datamaker')

    assert (b is not None)

    b.rm()

    api.apply(TEST_CONTEXT, '-', '-', 'Root')

    b = api.get(TEST_CONTEXT, 'PreMaker_auf_root')

    assert (b is not None)

    api.delete_context(TEST_CONTEXT)

Example #4

0

Show file

def test_AB6(run_test):
    """
    6.) Run A->B, Re-run A*.  Run A*->B, B should re-run.

    Args:
        run_test:

    Returns:

    """

    result = api.apply(TEST_CONTEXT, B)
    assert result['success'] is True
    assert result['did_work'] is True
    B_uuid = api.get(TEST_CONTEXT, 'B').uuid

    result = api.apply(TEST_CONTEXT, APrime)
    assert result['success'] is True
    assert result['did_work'] is True
    APrime_uuid = api.get(TEST_CONTEXT, 'APrime').uuid

    def custom_B_requires(self):
        self.add_dependency('a', APrime, params={})

    old_requires = B.pipe_requires
    B.pipe_requires = custom_B_requires

    result = api.apply(TEST_CONTEXT, B)
    assert result['success'] is True
    assert result['did_work'] is True
    assert APrime_uuid == api.get(TEST_CONTEXT, 'APrime').uuid
    assert B_uuid != api.get(TEST_CONTEXT, 'B').uuid

    B.pipe_requires = old_requires

Example #5

0

Show file

File: test_force_one_and_all.py Project: sayantansatpati/disdat

def test(run_test):
    """ This tests if apply force=True and force_all=True re-run everything.
    We have two tasks. One depends on the other.
    force_all should re-run both, force should re-run only the last.
    """

    # first run there should be no bundles
    #assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.apply(TEST_CONTEXT, A, params={})
    first_B_uuid = api.get(TEST_CONTEXT, 'B').uuid
    first_A_uuid = api.get(TEST_CONTEXT, 'A').uuid

    # second, force re-run last task
    api.apply(TEST_CONTEXT, A, force=True, params={})
    one_B_uuid = api.get(TEST_CONTEXT, 'B').uuid
    one_A_uuid = api.get(TEST_CONTEXT, 'A').uuid
    assert (first_B_uuid == one_B_uuid)
    assert (first_A_uuid != one_A_uuid)

    # second, force all to re-run.
    api.apply(TEST_CONTEXT, A, force_all=True, params={})
    all_B_uuid = api.get(TEST_CONTEXT, 'B').uuid
    all_A_uuid = api.get(TEST_CONTEXT, 'A').uuid
    assert (all_B_uuid != one_B_uuid)
    assert (all_A_uuid != one_A_uuid)

    # third, make sure a force_all doesn't crash if there is an external bundle.
    api.apply(TEST_CONTEXT, A, force_all=True, params={'set_ext_dep': True})
    final_B_uuid = api.get(TEST_CONTEXT, 'B').uuid
    final_A_uuid = api.get(TEST_CONTEXT, 'A').uuid
    assert (final_B_uuid == all_B_uuid)
    assert (final_A_uuid != all_A_uuid)

Example #6

0

Show file

    def bundle_inputs(self):
        """
        Given this pipe, return the set of bundles that this task used as input.
        Return a list of tuples that contain (processing_name, uuid, arg_name)

        NOTE: Calls task.deps which calls task._requires which calls task.requires()

        Args:
            self (disdat.PipeTask):  The pipe task in question

        Returns:
            (dict(str:`disdat.api.Bundle`)):  {arg_name: bundle, ...}
        """

        input_bundles = {}
        for task in self.deps():
            if isinstance(task, ExternalDepTask):
                b = api.get(self.data_context.get_local_name(),
                            None,
                            uuid=task.uuid)
            else:
                b = PathCache.get_path_cache(task).bundle
            assert b is not None
            input_bundles[task.user_arg_name] = b
        return input_bundles

Example #7

0

Show file

File: test_remote.py Project: kyocum/disdat

def test_push(run_test):
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3', region_name='us-east-1')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    bucket = s3_resource.Bucket(TEST_BUCKET)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    _ = api.Bundle(TEST_CONTEXT, name='remote_test', data='Hello')
    bundle = api.get(TEST_CONTEXT, 'remote_test')

    assert bundle.data == 'Hello'

    bundle.commit()
    bundle.push()

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    bucket.objects.all().delete()
    bucket.delete()

Example #8

0

Show file

File: test_remote.py Project: penguinkang/disdat

def test_push():
    api.context(context_name=TEST_CONTEXT)

    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    bucket = s3_resource.Bucket(TEST_BUCKET)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL, force=True)

    api.apply(TEST_CONTEXT, 'RemoteTest')
    bundle = api.get(TEST_CONTEXT, 'remote_test')

    assert bundle.data == 'Hello'

    bundle.commit()
    bundle.push()

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    bucket.objects.all().delete()
    bucket.delete()
    api.delete_context(context_name=TEST_CONTEXT)

Example #9

0

Show file

File: test_remote.py Project: sayantansatpati/disdat

def test_pull(run_test):
    s3_client = boto3.client('s3')
    s3_resource = boto3.resource('s3')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)
    bucket = s3_resource.Bucket(TEST_BUCKET)

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' not in objects, 'Bucket should be empty'

    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)

    api.apply(TEST_CONTEXT, RemoteTest)
    bundle = api.get(TEST_CONTEXT, 'remote_test')

    assert bundle.data == 'Hello'

    bundle.commit()
    bundle.push()

    objects = s3_client.list_objects(Bucket=TEST_BUCKET)
    assert 'Contents' in objects, 'Bucket should not be empty'
    assert len(objects['Contents']) > 0, 'Bucket should not be empty'

    api.delete_context(context_name=TEST_CONTEXT)
    api.context(context_name=TEST_CONTEXT)
    api.remote(TEST_CONTEXT, TEST_REMOTE, TEST_BUCKET_URL)
    api.pull(TEST_CONTEXT)

    pulled_bundles = api.search(TEST_CONTEXT)
    assert len(pulled_bundles) > 0, 'Pulled bundles down'
    assert pulled_bundles[0].data == 'Hello', 'Bundle contains correct data'

    bucket.objects.all().delete()
    bucket.delete()

Example #10

0

Show file

def manual_test_run_aws_batch(run_test, build_container_setup_only):
    """ Incomplete test.   The container code itself needs to have
    its S3 access mocked out.  Here we are testing manually
    """

    # Setup moto s3 resources
    #s3_resource = boto3.resource('s3')
    #s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Add a remote.   Pull and Push!
    manual_s3_url = 's3://'
    api.remote(TEST_CONTEXT, TEST_CONTEXT, manual_s3_url)

    retval = api.run(SETUP_DIR,
                     TEST_CONTEXT,
                     PIPELINE_CLS,
                     remote_context=TEST_CONTEXT,
                     remote_s3_url=manual_s3_url,
                     pull=True,
                     push=True)

    # Blow away everything and pull
    api.rm(TEST_CONTEXT, bundle_name='.*', rm_all=True)
    api.pull(TEST_CONTEXT)
    b = api.get(TEST_CONTEXT, 'A')
    assert b.data == sum(COMMON_DEFAULT_ARGS)

Example #11

0

Show file

def deprecated_data_as_bundle_not_csv(tmpdir):

    # Create Context
    api.context(TEST_CONTEXT)

    # Create test .txt file
    test_txt_path = os.path.join(str(tmpdir), 'test.txt')
    with open(test_txt_path, 'w') as f:
        f.write('this should not create a bundle')

    # Assert the txt file exists
    assert os.path.exists(test_txt_path)

    # Try to add file to the bundle
    with pytest.raises(AssertionError) as ex:
        api.add(TEST_CONTEXT,
                'bad_path',
                test_txt_path,
                treat_file_as_bundle=True)

    # Assert Exited with error code of 1
    assert ex.type == AssertionError

    # Make sure bundle does not exist
    assert api.get(
        TEST_CONTEXT,
        'test_file_as_bundle_txt_file') is None, 'Bundle should not exist'

    api.delete_context(TEST_CONTEXT)

Example #12

0

Show file

File: test_pipeline.py Project: penguinkang/disdat

def test_task_with_parameter():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, 'B', params={'n': 10})
    data = api.get(TEST_CONTEXT, 'b').data

    assert data == 20, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(api.search(TEST_CONTEXT)) == 1, 'One bundle should be present'

    api.apply(TEST_CONTEXT, 'B', params={'n': 20})
    data = api.get(TEST_CONTEXT, 'b').data

    assert data == 40, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(api.search(TEST_CONTEXT)) == 2, 'Two bundles should be present'

Example #13

0

Show file

File: test_link_localization.py Project: kyocum/disdat

def create_remote_file_bundle(name):
    """ Create a bundle with
     a.) an unmanaged s3 path
     b.) a managed s3 path
     c.) a managed s3 path with a directory
     """
    s3_resource = boto3.resource('s3', region_name='us-east-1')
    s3_resource.create_bucket(Bucket=TEST_BUCKET)

    # Copy a local file to moto s3 bucket
    saved_md5 = md5_file(__file__)
    aws_s3.put_s3_file(__file__, TEST_BUCKET_URL)

    s3_path_1 = os.path.join(TEST_BUCKET_URL, os.path.basename(__file__))

    with api.Bundle(TEST_CONTEXT, name=name) as b:
        s3_path_2 = b.get_remote_file('test_s3_file.txt')
        aws_s3.cp_local_to_s3_file(__file__, s3_path_2)
        s3_path_3 = os.path.join(b.get_remote_directory('vince/klartho'),
                                 'test_s3_file.txt')
        aws_s3.cp_local_to_s3_file(__file__, s3_path_3)

        b.add_data([s3_path_1, s3_path_2, s3_path_3])
        b.add_tags({'info': 'added an s3 file'})

    saved_uuid = b.uuid

    b = api.get(TEST_CONTEXT, None, uuid=saved_uuid)
    b.commit()
    md5 = md5_file(b.data[0])
    print(md5)
    print(saved_md5)
    assert md5 == saved_md5

Example #14

0

Show file

File: test_pipeline.py Project: penguinkang/disdat

def test_child_task_with_parameter():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, 'C', params={'n': 10})
    data = api.get(TEST_CONTEXT, 'c').data

    assert data == 22, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(
        api.search(TEST_CONTEXT)) == 3, 'Three bundles should be present'

    api.apply(TEST_CONTEXT, 'C', params={'n': 20})
    data = api.get(TEST_CONTEXT, 'c').data

    assert data == 42, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(api.search(TEST_CONTEXT)) == 5, 'Five bundles should be present'

Example #15

0

Show file

def test_list_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    _ = api.Bundle(TEST_CONTEXT, name='list_task', data=[1, 2, 3])
    data = api.get(TEST_CONTEXT, 'list_task').data

    assert np.array_equal(data, [1, 2, 3]), 'Data did not match output'
    assert type(data) == np.ndarray, 'Data is not list'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'

Example #16

0

Show file

def test_float_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    _ = api.Bundle(TEST_CONTEXT, name='float_task', data=2.5)
    data = api.get(TEST_CONTEXT, 'float_task').data

    assert data == 2.5, 'Data did not match output'
    assert type(data) == float, 'Data is not float'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'

Example #17

0

Show file

File: test_pipeline.py Project: penguinkang/disdat

def test_dependant_tasks():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, 'C')
    data = api.get(TEST_CONTEXT, 'c').data

    assert data == 6, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(
        api.search(TEST_CONTEXT)) == 3, 'Three bundles should be present'

Example #18

0

Show file

File: test_add.py Project: kyocum/disdat

def test_single_file(tmpdir):

    # Create Context
    api.context(TEST_CONTEXT)

    # Create test .csv file
    test_csv_path = os.path.join(str(tmpdir), 'test.csv')
    df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
    df.to_csv(test_csv_path)

    # Assert csv_file_exits
    assert os.path.exists(test_csv_path)

    # Add the file to the bundle
    api.add(TEST_CONTEXT, 'test_single_file', test_csv_path)

    # Retrieve the bundle
    b = api.get(TEST_CONTEXT, 'test_single_file')

    # Assert the bundles contain the same data
    bundle_hash, file_hash = get_hash(b.data), get_hash(test_csv_path)
    assert bundle_hash == file_hash, 'Hashes do not match'

    # Test with tags
    tag = {'test': 'tag'}
    api.add(TEST_CONTEXT, 'test_single_file', test_csv_path, tags=tag)

    # Retrieve the bundle
    b = api.get(TEST_CONTEXT, 'test_single_file')

    # Assert the bundles contain the same data
    bundle_hash, file_hash = get_hash(b.data), get_hash(test_csv_path)
    assert bundle_hash == file_hash, 'Hashes do not match'
    assert b.tags == tag, 'Tags do not match'

    # Remove test .csv
    os.remove(test_csv_path)

    # Assert that data still remains in the bundle
    assert api.get(TEST_CONTEXT,
                   'test_single_file') is not None, 'Bundle should exist'

    api.delete_context(TEST_CONTEXT)

Example #19

0

Show file

File: test_external_dep.py Project: pombredanne/disdat

def create_bundle_from_pipeline():
    """ Run the internal pipeline, create a bundle, return the uuid
    """

    api.apply(TEST_CONTEXT,
              ExternalPipeline,
              params={'test_param': EXT_TASK_PARAM_VAL},
              output_bundle=EXT_BUNDLE_NAME)
    b = api.get(TEST_CONTEXT, EXT_BUNDLE_NAME)
    return b.uuid

Example #20

0

Show file

File: test_output_types.py Project: sayantansatpati/disdat

def test_int_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, IntTask)
    data = api.get(TEST_CONTEXT, 'int_task').data

    assert data == 1, 'Data did not match output'
    assert type(data) == int, 'Data is not int'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'

Example #21

0

Show file

def test_string_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    _ = api.Bundle(TEST_CONTEXT, name='string_task', data='output')
    data = api.get(TEST_CONTEXT, 'string_task').data

    assert data == 'output', 'Data did not match output'
    assert type(data) == six.text_type, 'Data is not string'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'

Example #22

0

Show file

File: test_output_types.py Project: sayantansatpati/disdat

def test_dict_task():
    setup()
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, DictTask)
    data = api.get(TEST_CONTEXT, 'dict_task').data

    assert data == {'hello': ['world']}, 'Data did not match output'
    assert type(data) == dict, 'Data is not dict'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'

Example #23

0

Show file

def test_args_bundle():
    """ Create bundle, store args.
    """

    with api.Bundle(TEST_CONTEXT) as b:
        b.add_params(serialized_json_args)
        b.name = 'output'

    b = api.get(TEST_CONTEXT, 'output')

    assert (b.params == serialized_json_args)

Example #24

0

Show file

def test_A2_A3(run_test):
    """
    2.) Run A, Run A, should re-use
    3.) Run A, Run A*, should re-run
    """

    result = api.apply(TEST_CONTEXT, A)
    assert result['did_work'] is True
    first_A_uuid = api.get(TEST_CONTEXT, 'A').uuid
    result = api.apply(TEST_CONTEXT, A)
    assert result['did_work'] is False
    second_A_uuid = api.get(TEST_CONTEXT, 'A').uuid
    assert first_A_uuid == second_A_uuid
    assert len(api.search(TEST_CONTEXT, 'A')) is 1

    # Mod args, should re-run
    result = api.apply(TEST_CONTEXT, A, params={'a': 2, 'b': 3})
    assert result['did_work'] is True
    next_A_uuid = api.get(TEST_CONTEXT, 'A').uuid
    assert next_A_uuid != second_A_uuid
    assert len(api.search(TEST_CONTEXT, 'A')) is 2

Example #25

0

Show file

def test_dict_task():
    setup()
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    d = {'hello': ['world']}
    _ = api.Bundle(TEST_CONTEXT, name='dict_task', data=d)
    d = api.get(TEST_CONTEXT, 'dict_task').data

    assert d == {'hello': ['world']}, 'Data did not match output'
    assert type(d) == dict, 'Data is not dict'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'

Example #26

0

Show file

File: test_output_types.py Project: sayantansatpati/disdat

def test_file_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, FileTask)
    output_path = api.get(TEST_CONTEXT, 'file_task').data

    with open(output_path) as f:
        output = f.read()

    assert output == '5', 'Data did not match output'
    assert type(output_path) == str, 'Data is not path'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'

Example #27

0

Show file

def test_df_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    df = pd.DataFrame()
    df['a'] = [1, 2, 3]

    _ = api.Bundle(TEST_CONTEXT, name='df_task', data=df)
    data = api.get(TEST_CONTEXT, 'df_task').data

    assert df.equals(data), 'Data did not match output'
    assert type(data) == pd.DataFrame, 'Data is not df'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'

Example #28

0

Show file

File: test_pipeline.py Project: penguinkang/disdat

def test_single_task():
    assert len(api.search(TEST_CONTEXT)) == 0, 'Context should be empty'

    api.apply(TEST_CONTEXT, 'A')
    data = api.get(TEST_CONTEXT, 'a').data

    assert data == 2, 'Data did not match output'
    assert type(data) == int, 'Data is not path'
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'

    api.apply(TEST_CONTEXT, 'A')
    assert len(
        api.search(TEST_CONTEXT)) == 1, 'Only one bundle should be present'

Example #29

0

Show file

    def prepare_pipe_kwargs(self, for_run=False):
        """ Each upstream task produces a bundle.  Prepare that bundle as input
        to the user's pipe_run function.

        Args:
            for_run (bool): prepare args for run -- at that point all upstream tasks have completed.

        Returns:
            (dict): A dictionary with the arguments.

        """
        kwargs = dict()

        # Place upstream task outputs into the kwargs.  Thus the user does not call
        # self.inputs().  If they did, they would get a list of output targets for the bundle
        if for_run:

            # Reset the stored tags, in case this instance is run multiple times.
            self._input_tags = {}
            self._input_bundle_uuids = {}

            upstream_tasks = [(t.user_arg_name, PathCache.get_path_cache(t))
                              for t in self.deps()]
            for user_arg_name, pce in [
                    u for u in upstream_tasks if u[1] is not None
            ]:

                b = api.get(self.data_context.get_local_name(),
                            None,
                            uuid=pce.uuid)
                assert b.is_presentable

                # Download data that is not local (the linked files are not present).
                # This is the default behavior when running in a container.
                if self.incremental_pull:
                    b.pull(localize=True)

                if pce.instance.user_arg_name in kwargs:
                    _logger.warning(
                        'Task human name {} reused when naming task dependencies: Dependency hyperframe shadowed'
                        .format(pce.instance.user_arg_name))

                self._input_tags[user_arg_name] = b.tags
                self._input_bundle_uuids[user_arg_name] = pce.uuid
                kwargs[user_arg_name] = b.data

        return kwargs

Example #30

0

Show file

def test_luigi_args(run_test):
    """ Create a task, store args, retrieve from bundle api.
    Pass in python objects as the values for Luigi parameters.
    Stored as serialized json objects.   Bundle presents the parameters
    as the serialized objects (Disdat isn't aware they were Luigi serialized).
    """

    api.apply(TEST_CONTEXT,
              ArgTask,
              output_bundle='output',
              params=test_luigi_args_data)
    b = api.get(TEST_CONTEXT, 'output')
    found_p = {}
    for k, p in b.params.items():
        attribute = getattr(ArgTask, k)
        found_p[k] = attribute.parse(p)
    assert (found_p == test_luigi_args_data)