def test_same_models_are_equal(dbdiskrepo): """ Validates that two separately constructed models using the same parameters hash to the same artifact in provenance terms. """ fit1 = fit_model() fit2 = fit_model() assert fit1.artifact.id == fit2.artifact.id assert fit1.artifact.value_id == fit2.artifact.value_id assert hash(fit1) == hash(fit2)
def test_copied_models_are_equal(dbdiskrepo): """ Validates that a copied model (deep or shallow copied) hashes to the same artifact as the original in provenance terms. """ original = fit_model() shallow = copy(original) assert original.artifact.id == shallow.artifact.id assert original.artifact.value_id == shallow.artifact.value_id assert hash(original) == hash(shallow) deep = deepcopy(original) assert original.artifact.id == deep.artifact.id assert original.artifact.value_id == deep.artifact.value_id assert hash(original) == hash(deep)
def test_lazy_dict(): lazy_dict = p.lazy_dict({'foo': lambda: 'bar', 'baz': lambda: 'qux'}) initial_hash = hash(lazy_dict) # check that keys can be fetched assert lazy_dict['foo'] == 'bar' assert lazy_dict['baz'] == 'qux' # check that the hash remains the same as values are realized assert hash(lazy_dict) == initial_hash # check that it raises correctly with pytest.raises(KeyError): lazy_dict['bar'] del lazy_dict['foo'] with pytest.raises(KeyError): lazy_dict['foo']
def test_hashing_of_artifacts_and_proxies(repo): @p.provenance() def load_data(): return [1, 2, 3] original_proxy = load_data() original_artifact = original_proxy.artifact loaded_artifact = repo.get_by_id(original_artifact.id) loaded_proxy = loaded_artifact.proxy() # All artifacts should have the same hash assert hash(original_artifact) == hash(loaded_artifact) # All proxies should have the same hash assert hash(original_proxy) == hash(loaded_proxy) # All values should have the same hash assert hash(original_artifact.value) == hash(loaded_artifact.value) # Artifacts and proxies should not have the same hash assert hash(original_artifact) != hash(original_proxy) # Proxies and values should have the same hash assert hash(original_proxy) == hash(original_artifact.value)
def test_hashing_with_artifact_hasher_also_returns_iter_of_artifacts_preserves_hash( repo, ): @p.provenance() def load_data(): return [1, 2, 3] @p.provenance() def create_composite(data): return {'foo': 'bar', 'data': data} data = load_data() original_proxy = create_composite(data) original_artifact = original_proxy.artifact loaded_artifact = repo.get_by_id(original_artifact.id) loaded_proxy = loaded_artifact.proxy() expected_proxy_ids = frozenset((original_artifact.id, data.artifact.id)) expected_artifact_ids = frozenset((original_artifact.id, )) original_proxy_hash, artifacts = hash(original_proxy, hasher=ah.artifact_hasher()) ids = frozenset(a.id for a in artifacts) assert original_proxy_hash == hash(original_proxy) assert ids == expected_proxy_ids original_artifact_hash, artifacts = hash(original_artifact, hasher=ah.artifact_hasher()) ids = frozenset(a.id for a in artifacts) assert original_artifact_hash == hash(original_artifact) assert ids == expected_artifact_ids loaded_artifact_hash, artifacts = hash(loaded_artifact, hasher=ah.artifact_hasher()) ids = frozenset(a.id for a in artifacts) assert loaded_artifact_hash == hash(loaded_artifact) assert ids == expected_artifact_ids loaded_proxy_hash, artifacts = hash(loaded_proxy, hasher=ah.artifact_hasher()) ids = frozenset(a.id for a in artifacts) assert loaded_proxy_hash == hash(loaded_proxy) assert ids == expected_proxy_ids
def test_shared_values_hashing(base_data): base_data = base_data.draw(data) base_copy = lambda: copy.deepcopy(base_data) shared_dict = {'a': base_data, 'b': base_data} without_sharing_dict = {'a': base_copy(), 'b': base_copy()} assert hash(shared_dict) == hash(without_sharing_dict) shared_tuple = (base_data, base_data) without_sharing_tuple = (base_copy(), base_copy()) assert hash(shared_tuple) == hash(without_sharing_tuple) shared_list = [base_data, base_data] without_sharing_list = [base_copy(), base_copy()] assert hash(shared_list) == hash(without_sharing_list)
def test_integration_test(repo): @p.provenance(version=0, name='initial_data') def load_data(filename): return [1, 2] @p.provenance(repo=repo, remove=('to_remove', )) def process_data_A(data, process_a_inc, to_remove): return [i + process_a_inc for i in data] times_called = 0 @p.provenance(ignore=('to_ignore', )) def process_data_B(data, process_b_inc, to_ignore): nonlocal times_called times_called += 1 return [i + process_b_inc for i in data] @p.provenance(tags=['tag_a']) def combine_processed_data(inc_a, inc_b): return [a + b for a, b in zip(inc_a, inc_b)] def run_pipeline(filename, to_ignore, to_remove): data = load_data(filename) # [1, 2] inc_a = process_data_A(data, 1, to_remove) # [2, 3] inc_b = process_data_B(data, 5, to_ignore) # [6, 7] res = combine_processed_data(inc_a, inc_b) # [8, 10] return res result = run_pipeline('foo-bar.csv', 'something', 'removed') artifact = result.artifact inc_a_artifact = artifact.inputs['kargs']['inc_a'].artifact inc_b_artifact = artifact.inputs['kargs']['inc_b'].artifact assert result == [8, 10] # check initial wrapping assert artifact.value_id == hash([8, 10]) # check for custom_fields and tags in result assert artifact.custom_fields == {'tags': ['tag_a']} assert artifact.tags == ['tag_a'] # check that inputs were removed assert inc_a_artifact.inputs == { 'kargs': { 'data': [1, 2], 'process_a_inc': 1 }, 'varargs': (), } # check metadata data_artifact = inc_a_artifact.inputs['kargs']['data'].artifact assert data_artifact.name == 'initial_data' assert data_artifact.version == 0 # Check caching assert times_called == 1 new_res = run_pipeline('foo-bar.csv', 'something-different', 'removed-again') assert new_res == [8, 10] assert times_called == 1 # Check that the dependencies can be returned dependencies = p.dependencies(result.artifact.id) assert dependencies == [ data_artifact, inc_a_artifact, artifact.inputs['kargs']['inc_b'].artifact, artifact, ] # Check that the input_artifact_ids were properly stored assert result.artifact.input_artifact_ids == frozenset( (inc_a_artifact.id, inc_b_artifact.id))
def test_hashing_of_functions(): def foo(a, b): return a + b assert hash(foo) == hash(foo)
def test_hash_of_fortran_array_is_the_same_as_c_array(): c = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='C') f = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='F') assert hash(c) == hash(f)
def test_hash_of_contiguous_array_is_the_same_as_noncontiguous(): a = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='F')[:, :1, :] b = np.ascontiguousarray(a) assert hash(a) == hash(b)
def test_shallow_and_deep_copies_hashing(o): original_hash = hash(o) shallow_copy = copy.copy(o) deep_copy = copy.deepcopy(o) assert hash(shallow_copy) == original_hash assert hash(deep_copy) == original_hash