def test_dependencies_include_wrapped_artifacts(dbdiskrepo): @p.provenance() def add(a, b): return a + b sub = add(5, 5).artifact.id # @p.provenance(returns_composite=True) @p.provenance() def calc(a, b): return {'add': add(a, b), 'mult': a * b} result = calc(5, 5) deps = set([a.id for a in p.dependencies(result.artifact.id)]) assert sub in deps
def test_integration_test(repo): @p.provenance(version=0, name='initial_data') def load_data(filename): return [1, 2] @p.provenance(repo=repo, remove=('to_remove', )) def process_data_A(data, process_a_inc, to_remove): return [i + process_a_inc for i in data] times_called = 0 @p.provenance(ignore=('to_ignore', )) def process_data_B(data, process_b_inc, to_ignore): nonlocal times_called times_called += 1 return [i + process_b_inc for i in data] @p.provenance(tags=['tag_a']) def combine_processed_data(inc_a, inc_b): return [a + b for a, b in zip(inc_a, inc_b)] def run_pipeline(filename, to_ignore, to_remove): data = load_data(filename) # [1, 2] inc_a = process_data_A(data, 1, to_remove) # [2, 3] inc_b = process_data_B(data, 5, to_ignore) # [6, 7] res = combine_processed_data(inc_a, inc_b) # [8, 10] return res result = run_pipeline('foo-bar.csv', 'something', 'removed') artifact = result.artifact inc_a_artifact = artifact.inputs['kargs']['inc_a'].artifact inc_b_artifact = artifact.inputs['kargs']['inc_b'].artifact assert result == [8, 10] # check initial wrapping assert artifact.value_id == hash([8, 10]) # check for custom_fields and tags in result assert artifact.custom_fields == {'tags': ['tag_a']} assert artifact.tags == ['tag_a'] # check that inputs were removed assert inc_a_artifact.inputs == { 'kargs': { 'data': [1, 2], 'process_a_inc': 1 }, 'varargs': (), } # check metadata data_artifact = inc_a_artifact.inputs['kargs']['data'].artifact assert data_artifact.name == 'initial_data' assert data_artifact.version == 0 # Check caching assert times_called == 1 new_res = run_pipeline('foo-bar.csv', 'something-different', 'removed-again') assert new_res == [8, 10] assert times_called == 1 # Check that the dependencies can be returned dependencies = p.dependencies(result.artifact.id) assert dependencies == [ data_artifact, inc_a_artifact, artifact.inputs['kargs']['inc_b'].artifact, artifact, ] # Check that the input_artifact_ids were properly stored assert result.artifact.input_artifact_ids == frozenset( (inc_a_artifact.id, inc_b_artifact.id))