Esempio n. 1
0
def test_chained_read_through_write():
    read_store = bs.MemoryStore({'foo': 42}, read=True, write=False)
    store_ahead = bs.MemoryStore(read=True,
                                 write=True,
                                 read_through_write=True)
    read_through_write_store = bs.MemoryStore(read=True,
                                              write=True,
                                              read_through_write=True)
    no_read_through_write_store = bs.MemoryStore(read=True,
                                                 write=True,
                                                 read_through_write=False)
    stores = [
        no_read_through_write_store, read_through_write_store, read_store,
        store_ahead
    ]
    chained_store = bs.ChainedStore(stores)

    assert 'foo' not in read_through_write_store
    assert 'foo' not in no_read_through_write_store
    assert 'foo' not in store_ahead
    # verify we read from the read-only store
    assert chained_store['foo'] == 42

    assert 'foo' in read_through_write_store
    assert 'foo' not in store_ahead
    assert 'foo' not in no_read_through_write_store
Esempio n. 2
0
def test_permissions():
    store = bs.MemoryStore(read=True, write=True, delete=True)
    store.put('a', 1)
    assert store.get('a') == 1
    store.delete('a')

    store = bs.MemoryStore(read=False, write=False, delete=False)
    with pytest.raises(cs.PermissionError) as e:
        store.put('a', 1)

    with pytest.raises(cs.PermissionError) as e:
        store.get('a')

    with pytest.raises(cs.PermissionError) as e:
        store.delete('a')
Esempio n. 3
0
def test_chained_with_readonly():
    read_store = bs.MemoryStore({'foo': 42},
                                read=True,
                                write=False,
                                delete=False)
    write_store = bs.MemoryStore(read=True, write=True, delete=False)
    stores = [read_store, write_store]
    chained_store = bs.ChainedStore(stores)

    # verify we read from the read-only store
    assert chained_store['foo'] == 42

    # but that it is not written to
    chained_store.put('bar', 55)
    assert 'bar' in chained_store
    assert 'bar' in write_store
    assert 'bar' not in read_store
Esempio n. 4
0
def test_inputs_json(db_session):
    repo = r.DbRepo(db_session, bs.MemoryStore())
    @p.provenance(version=0, name='initial_data', repo=repo)
    def load_data(filename, timestamp):
        return {'data': [1,2,3], 'timestamp': timestamp}

    @p.provenance(repo=repo)
    def process_data_X(data, process_x_inc, timestamp):
        _data = [i + process_x_inc for i in data['data']]
        return {'data': _data, 'timestamp': timestamp}

    @p.provenance(repo=repo)
    def process_data_Y(data, process_y_inc, timestamp):
        _data = [i + process_y_inc for i in data['data']]
        return {'data': _data, 'timestamp': timestamp}

    @p.provenance(repo=repo)
    def combine_processed_data(filename, inc_x, inc_y, timestamp):
        _data = [a + b for a, b in zip(inc_x['data'], inc_y['data'])]
        return {'data': _data, 'timestamp': timestamp}

    def pipeline(filename, timestamp, process_x_inc, process_y_inc):
        data = load_data(filename, timestamp)
        inc_x = process_data_X(data, process_x_inc, timestamp)
        inc_y = process_data_Y(data, process_y_inc, timestamp)
        res = combine_processed_data(filename, inc_x, inc_y, timestamp)
        return {'data': data, 'inc_x': inc_x, 'inc_y': inc_y, 'res': res}

    now = datetime(2016, 9, 27, 7, 51, 11, 613544)

    expected_inputs_json = {
        "__varargs": [],
        "filename": "foo-bar",
        "timestamp": now,
        "inc_x": {
            "id": "2c33a362ebd51f830d0b245473ab6c1269674259",
            "name": "test_repos.process_data_X",
            "type": "ArtifactProxy"
        },
        "inc_y": {
            "id": "f9b1bb7a8aaf435fbf60b92cd88bf6c46604f702",
            "name": "test_repos.process_data_Y",
            "type": "ArtifactProxy"
        }
    }

    results = pipeline(filename='foo-bar', process_x_inc=5, process_y_inc=10, timestamp=now)
    res = results['res'].artifact
    inputs_json = r._inputs_json(res.inputs)
    assert inputs_json == expected_inputs_json

    results = pipeline(filename='foo-bar', process_x_inc=5, process_y_inc=10, timestamp=now)
    res = results['res'].artifact
    inputs_json = r._inputs_json(res.inputs)
    assert inputs_json == expected_inputs_json
Esempio n. 5
0
def test_chained_writes_may_be_allowed_on_read_throughs_only():
    read_store = bs.MemoryStore({'foo': 42}, read=True, write=False)
    read_through_write_only_store = bs.MemoryStore(read=True,
                                                   write=False,
                                                   read_through_write=True)
    write_store = bs.MemoryStore(read=True,
                                 write=True,
                                 read_through_write=False)
    stores = [write_store, read_through_write_only_store, read_store]
    chained_store = bs.ChainedStore(stores)

    # verify we read from the read-only store
    assert chained_store['foo'] == 42

    assert 'foo' in read_through_write_only_store
    assert 'foo' not in write_store

    chained_store.put('bar', 55)
    assert 'bar' in chained_store
    assert 'bar' not in read_through_write_only_store
    assert 'bar' in write_store
Esempio n. 6
0
def test_chained_storage_with_disk_and_s3_sharing_cachedir(s3fs):
    tmp_dir = '/tmp/prov_shared_store'
    shutil.rmtree(tmp_dir, ignore_errors=True)
    mem_store = bs.MemoryStore(read=True, write=True, delete=True)
    disk_store = bs.DiskStore(tmp_dir, read=True, write=True, delete=True)
    s3_store = bs.S3Store(
        tmp_dir,
        s3fs=s3fs,
        basepath='bucket/prov_test',
        read=True,
        write=True,
        delete=True,
        always_check_remote=True,
    )
    stores = [mem_store, disk_store, s3_store]

    chained_store = bs.ChainedStore(stores)

    key = 'foobar'
    data = {'a': 1, 'b': 2}

    for store in stores:
        assert key not in store
    assert key not in store

    chained_store.put(key, data)
    assert key in store
    for store in stores:
        assert key in store

    assert store.get(key) == data
    assert store[key] == data

    store.delete(key)
    assert key not in store

    with pytest.raises(KeyError):
        store.delete(key)

    with pytest.raises(KeyError):
        store.get(key)
Esempio n. 7
0
def memory_store():
    return bs.MemoryStore()
Esempio n. 8
0
def test_memory_blobstore_raises(key, obj):
    store = bs.MemoryStore(read=True,
                           write=True,
                           delete=True,
                           on_duplicate_key='raise')
    assert_store_basic_ops(store, key, obj)
Esempio n. 9
0
def test_memory_blobstore(key, obj):
    store = bs.MemoryStore(read=True, write=True, delete=True)
    assert_store_basic_ops(store, key, obj)
Esempio n. 10
0
def test_inputs_json(db_session):
    repo = r.DbRepo(db_session, bs.MemoryStore())

    @p.provenance(version=0, name='initial_data', repo=repo)
    def load_data(filename, timestamp):
        return {'data': [1, 2, 3], 'timestamp': timestamp}

    @p.provenance(repo=repo)
    def process_data_X(data, process_x_inc, timestamp):
        _data = [i + process_x_inc for i in data['data']]
        return {'data': _data, 'timestamp': timestamp}

    @p.provenance(repo=repo)
    def process_data_Y(data, process_y_inc, timestamp):
        _data = [i + process_y_inc for i in data['data']]
        return {'data': _data, 'timestamp': timestamp}

    @p.provenance(repo=repo)
    def combine_processed_data(filename, inc_x, inc_y, timestamp):
        _data = [a + b for a, b in zip(inc_x['data'], inc_y['data'])]
        return {'data': _data, 'timestamp': timestamp}

    def pipeline(filename, timestamp, process_x_inc, process_y_inc):
        data = load_data(filename, timestamp)
        inc_x = process_data_X(data, process_x_inc, timestamp)
        inc_y = process_data_Y(data, process_y_inc, timestamp)
        res = combine_processed_data(filename, inc_x, inc_y, timestamp)
        return {'data': data, 'inc_x': inc_x, 'inc_y': inc_y, 'res': res}

    now = datetime(2016, 9, 27, 7, 51, 11, 613544)

    expected_inputs_json = {
        '__varargs': [],
        'filename': 'foo-bar',
        'timestamp': now,
        'inc_x': {
            'id': 'c74da9d379234901fe7a89e03fa800b0',  # md5
            # "id": "2c33a362ebd51f830d0b245473ab6c1269674259",  # sha1
            'name': 'test_repos.process_data_X',
            'type': 'ArtifactProxy',
        },
        'inc_y': {
            'id': 'a1bd4d4ae1f33ae6379613618427f127',  # md5
            # "id": "f9b1bb7a8aaf435fbf60b92cd88bf6c46604f702",  # sha1
            'name': 'test_repos.process_data_Y',
            'type': 'ArtifactProxy',
        },
    }

    results = pipeline(filename='foo-bar',
                       process_x_inc=5,
                       process_y_inc=10,
                       timestamp=now)
    res = results['res'].artifact
    inputs_json = r._inputs_json(res.inputs)
    assert inputs_json == expected_inputs_json

    results = pipeline(filename='foo-bar',
                       process_x_inc=5,
                       process_y_inc=10,
                       timestamp=now)
    res = results['res'].artifact
    inputs_json = r._inputs_json(res.inputs)
    assert inputs_json == expected_inputs_json