Ejemplo n.º 1
0
def test_same_models_are_equal(dbdiskrepo):
    """
    Validates that two separately constructed models using the same parameters
    hash to the same artifact in provenance terms.
    """
    fit1 = fit_model()
    fit2 = fit_model()
    assert fit1.artifact.id == fit2.artifact.id
    assert fit1.artifact.value_id == fit2.artifact.value_id
    assert hash(fit1) == hash(fit2)
Ejemplo n.º 2
0
def test_copied_models_are_equal(dbdiskrepo):
    """
    Validates that a copied model (deep or shallow copied) hashes to the same
    artifact as the original in provenance terms.
    """
    original = fit_model()

    shallow = copy(original)
    assert original.artifact.id == shallow.artifact.id
    assert original.artifact.value_id == shallow.artifact.value_id
    assert hash(original) == hash(shallow)

    deep = deepcopy(original)
    assert original.artifact.id == deep.artifact.id
    assert original.artifact.value_id == deep.artifact.value_id
    assert hash(original) == hash(deep)
Ejemplo n.º 3
0
def test_lazy_dict():
    lazy_dict = p.lazy_dict({'foo': lambda: 'bar', 'baz': lambda: 'qux'})
    initial_hash = hash(lazy_dict)

    # check that keys can be fetched
    assert lazy_dict['foo'] == 'bar'
    assert lazy_dict['baz'] == 'qux'

    # check that the hash remains the same as values are realized
    assert hash(lazy_dict) == initial_hash

    # check that it raises correctly
    with pytest.raises(KeyError):
        lazy_dict['bar']

    del lazy_dict['foo']

    with pytest.raises(KeyError):
        lazy_dict['foo']
Ejemplo n.º 4
0
def test_hashing_of_artifacts_and_proxies(repo):
    @p.provenance()
    def load_data():
        return [1, 2, 3]

    original_proxy = load_data()
    original_artifact = original_proxy.artifact
    loaded_artifact = repo.get_by_id(original_artifact.id)
    loaded_proxy = loaded_artifact.proxy()

    # All artifacts should have the same hash
    assert hash(original_artifact) == hash(loaded_artifact)

    # All proxies should have the same hash
    assert hash(original_proxy) == hash(loaded_proxy)

    # All values should have the same hash
    assert hash(original_artifact.value) == hash(loaded_artifact.value)

    # Artifacts and proxies should not have the same hash
    assert hash(original_artifact) != hash(original_proxy)

    # Proxies and values should have the same hash
    assert hash(original_proxy) == hash(original_artifact.value)
Ejemplo n.º 5
0
def test_hashing_with_artifact_hasher_also_returns_iter_of_artifacts_preserves_hash(
    repo, ):
    @p.provenance()
    def load_data():
        return [1, 2, 3]

    @p.provenance()
    def create_composite(data):
        return {'foo': 'bar', 'data': data}

    data = load_data()

    original_proxy = create_composite(data)
    original_artifact = original_proxy.artifact
    loaded_artifact = repo.get_by_id(original_artifact.id)
    loaded_proxy = loaded_artifact.proxy()

    expected_proxy_ids = frozenset((original_artifact.id, data.artifact.id))
    expected_artifact_ids = frozenset((original_artifact.id, ))

    original_proxy_hash, artifacts = hash(original_proxy,
                                          hasher=ah.artifact_hasher())
    ids = frozenset(a.id for a in artifacts)
    assert original_proxy_hash == hash(original_proxy)
    assert ids == expected_proxy_ids

    original_artifact_hash, artifacts = hash(original_artifact,
                                             hasher=ah.artifact_hasher())
    ids = frozenset(a.id for a in artifacts)
    assert original_artifact_hash == hash(original_artifact)
    assert ids == expected_artifact_ids

    loaded_artifact_hash, artifacts = hash(loaded_artifact,
                                           hasher=ah.artifact_hasher())
    ids = frozenset(a.id for a in artifacts)
    assert loaded_artifact_hash == hash(loaded_artifact)
    assert ids == expected_artifact_ids

    loaded_proxy_hash, artifacts = hash(loaded_proxy,
                                        hasher=ah.artifact_hasher())
    ids = frozenset(a.id for a in artifacts)
    assert loaded_proxy_hash == hash(loaded_proxy)
    assert ids == expected_proxy_ids
Ejemplo n.º 6
0
def test_shared_values_hashing(base_data):
    base_data = base_data.draw(data)
    base_copy = lambda: copy.deepcopy(base_data)

    shared_dict = {'a': base_data, 'b': base_data}
    without_sharing_dict = {'a': base_copy(), 'b': base_copy()}

    assert hash(shared_dict) == hash(without_sharing_dict)

    shared_tuple = (base_data, base_data)
    without_sharing_tuple = (base_copy(), base_copy())

    assert hash(shared_tuple) == hash(without_sharing_tuple)

    shared_list = [base_data, base_data]
    without_sharing_list = [base_copy(), base_copy()]

    assert hash(shared_list) == hash(without_sharing_list)
Ejemplo n.º 7
0
def test_integration_test(repo):
    @p.provenance(version=0, name='initial_data')
    def load_data(filename):
        return [1, 2]

    @p.provenance(repo=repo, remove=('to_remove', ))
    def process_data_A(data, process_a_inc, to_remove):
        return [i + process_a_inc for i in data]

    times_called = 0

    @p.provenance(ignore=('to_ignore', ))
    def process_data_B(data, process_b_inc, to_ignore):
        nonlocal times_called
        times_called += 1
        return [i + process_b_inc for i in data]

    @p.provenance(tags=['tag_a'])
    def combine_processed_data(inc_a, inc_b):
        return [a + b for a, b in zip(inc_a, inc_b)]

    def run_pipeline(filename, to_ignore, to_remove):
        data = load_data(filename)  # [1, 2]
        inc_a = process_data_A(data, 1, to_remove)  # [2, 3]
        inc_b = process_data_B(data, 5, to_ignore)  # [6, 7]
        res = combine_processed_data(inc_a, inc_b)  # [8, 10]
        return res

    result = run_pipeline('foo-bar.csv', 'something', 'removed')
    artifact = result.artifact
    inc_a_artifact = artifact.inputs['kargs']['inc_a'].artifact
    inc_b_artifact = artifact.inputs['kargs']['inc_b'].artifact

    assert result == [8, 10]

    # check initial wrapping
    assert artifact.value_id == hash([8, 10])

    # check for custom_fields and tags in result
    assert artifact.custom_fields == {'tags': ['tag_a']}
    assert artifact.tags == ['tag_a']

    # check that inputs were removed
    assert inc_a_artifact.inputs == {
        'kargs': {
            'data': [1, 2],
            'process_a_inc': 1
        },
        'varargs': (),
    }

    # check metadata
    data_artifact = inc_a_artifact.inputs['kargs']['data'].artifact
    assert data_artifact.name == 'initial_data'
    assert data_artifact.version == 0

    # Check caching
    assert times_called == 1
    new_res = run_pipeline('foo-bar.csv', 'something-different',
                           'removed-again')
    assert new_res == [8, 10]
    assert times_called == 1

    # Check that the dependencies can be returned
    dependencies = p.dependencies(result.artifact.id)
    assert dependencies == [
        data_artifact,
        inc_a_artifact,
        artifact.inputs['kargs']['inc_b'].artifact,
        artifact,
    ]

    # Check that the input_artifact_ids were properly stored
    assert result.artifact.input_artifact_ids == frozenset(
        (inc_a_artifact.id, inc_b_artifact.id))
Ejemplo n.º 8
0
def test_hashing_of_functions():
    def foo(a, b):
        return a + b

    assert hash(foo) == hash(foo)
Ejemplo n.º 9
0
def test_hash_of_fortran_array_is_the_same_as_c_array():
    c = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='C')
    f = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='F')

    assert hash(c) == hash(f)
Ejemplo n.º 10
0
def test_hash_of_contiguous_array_is_the_same_as_noncontiguous():
    a = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='F')[:, :1, :]
    b = np.ascontiguousarray(a)
    assert hash(a) == hash(b)
Ejemplo n.º 11
0
def test_shallow_and_deep_copies_hashing(o):
    original_hash = hash(o)
    shallow_copy = copy.copy(o)
    deep_copy = copy.deepcopy(o)
    assert hash(shallow_copy) == original_hash
    assert hash(deep_copy) == original_hash