Beispiel #1
0
def repo(request, db_session):
    # clean old config settings
    r.Config.set_current(r.Config({}, {}, None))
    disk_store_gen = None
    disk_store_gen2 = None
    repo2 = None
    prevdir = os.getcwd()
    if request.param == 'memoryrepo':
        repo = r.MemoryRepo(read=True, write=True, delete=True)
    elif request.param == 'dbrepo-diskstore':
        disk_store_gen = disk_store()
        repo = r.DbRepo(db_session,
                        next(disk_store_gen),
                        read=True,
                        write=True,
                        delete=True)
    elif request.param == 'chained-memmem':
        repo = r.ChainedRepo([
            r.MemoryRepo(read=True, write=True, delete=True),
            r.MemoryRepo(read=True, write=True, delete=True),
        ])
    elif request.param == 'chained-repo':
        disk_store_gen = disk_store()
        disk_store_gen2 = disk_store()
        repo1 = r.DbRepo(db_session,
                         next(disk_store_gen),
                         read=True,
                         write=True,
                         delete=True)
        os.chdir(prevdir)
        repo2 = r.DbRepo(
            'postgresql://localhost/test_provenance',
            next(disk_store_gen2),
            read=True,
            write=True,
            delete=True,
            schema='second_repo',
        )
        repo = r.ChainedRepo([repo1, repo2])
    else:
        repo = r.DbRepo(db_session,
                        memory_store(),
                        read=True,
                        write=True,
                        delete=True)

    p.set_default_repo(repo)
    yield repo
    p.set_default_repo(None)
    if repo2 is not None:
        repo2._db_engine.execute('drop schema second_repo cascade;')

    if disk_store_gen:
        next(disk_store_gen, 'ignore')
    if disk_store_gen2:
        next(disk_store_gen2, 'ignore')
Beispiel #2
0
def test_inputs_json(db_session):
    repo = r.DbRepo(db_session, bs.MemoryStore())
    @p.provenance(version=0, name='initial_data', repo=repo)
    def load_data(filename, timestamp):
        return {'data': [1,2,3], 'timestamp': timestamp}

    @p.provenance(repo=repo)
    def process_data_X(data, process_x_inc, timestamp):
        _data = [i + process_x_inc for i in data['data']]
        return {'data': _data, 'timestamp': timestamp}

    @p.provenance(repo=repo)
    def process_data_Y(data, process_y_inc, timestamp):
        _data = [i + process_y_inc for i in data['data']]
        return {'data': _data, 'timestamp': timestamp}

    @p.provenance(repo=repo)
    def combine_processed_data(filename, inc_x, inc_y, timestamp):
        _data = [a + b for a, b in zip(inc_x['data'], inc_y['data'])]
        return {'data': _data, 'timestamp': timestamp}

    def pipeline(filename, timestamp, process_x_inc, process_y_inc):
        data = load_data(filename, timestamp)
        inc_x = process_data_X(data, process_x_inc, timestamp)
        inc_y = process_data_Y(data, process_y_inc, timestamp)
        res = combine_processed_data(filename, inc_x, inc_y, timestamp)
        return {'data': data, 'inc_x': inc_x, 'inc_y': inc_y, 'res': res}

    now = datetime(2016, 9, 27, 7, 51, 11, 613544)

    expected_inputs_json = {
        "__varargs": [],
        "filename": "foo-bar",
        "timestamp": now,
        "inc_x": {
            "id": "2c33a362ebd51f830d0b245473ab6c1269674259",
            "name": "test_repos.process_data_X",
            "type": "ArtifactProxy"
        },
        "inc_y": {
            "id": "f9b1bb7a8aaf435fbf60b92cd88bf6c46604f702",
            "name": "test_repos.process_data_Y",
            "type": "ArtifactProxy"
        }
    }

    results = pipeline(filename='foo-bar', process_x_inc=5, process_y_inc=10, timestamp=now)
    res = results['res'].artifact
    inputs_json = r._inputs_json(res.inputs)
    assert inputs_json == expected_inputs_json

    results = pipeline(filename='foo-bar', process_x_inc=5, process_y_inc=10, timestamp=now)
    res = results['res'].artifact
    inputs_json = r._inputs_json(res.inputs)
    assert inputs_json == expected_inputs_json
Beispiel #3
0
def test_inputs_json(db_session):
    repo = r.DbRepo(db_session, bs.MemoryStore())

    @p.provenance(version=0, name='initial_data', repo=repo)
    def load_data(filename, timestamp):
        return {'data': [1, 2, 3], 'timestamp': timestamp}

    @p.provenance(repo=repo)
    def process_data_X(data, process_x_inc, timestamp):
        _data = [i + process_x_inc for i in data['data']]
        return {'data': _data, 'timestamp': timestamp}

    @p.provenance(repo=repo)
    def process_data_Y(data, process_y_inc, timestamp):
        _data = [i + process_y_inc for i in data['data']]
        return {'data': _data, 'timestamp': timestamp}

    @p.provenance(repo=repo)
    def combine_processed_data(filename, inc_x, inc_y, timestamp):
        _data = [a + b for a, b in zip(inc_x['data'], inc_y['data'])]
        return {'data': _data, 'timestamp': timestamp}

    def pipeline(filename, timestamp, process_x_inc, process_y_inc):
        data = load_data(filename, timestamp)
        inc_x = process_data_X(data, process_x_inc, timestamp)
        inc_y = process_data_Y(data, process_y_inc, timestamp)
        res = combine_processed_data(filename, inc_x, inc_y, timestamp)
        return {'data': data, 'inc_x': inc_x, 'inc_y': inc_y, 'res': res}

    now = datetime(2016, 9, 27, 7, 51, 11, 613544)

    expected_inputs_json = {
        '__varargs': [],
        'filename': 'foo-bar',
        'timestamp': now,
        'inc_x': {
            'id': 'c74da9d379234901fe7a89e03fa800b0',  # md5
            # "id": "2c33a362ebd51f830d0b245473ab6c1269674259",  # sha1
            'name': 'test_repos.process_data_X',
            'type': 'ArtifactProxy',
        },
        'inc_y': {
            'id': 'a1bd4d4ae1f33ae6379613618427f127',  # md5
            # "id": "f9b1bb7a8aaf435fbf60b92cd88bf6c46604f702",  # sha1
            'name': 'test_repos.process_data_Y',
            'type': 'ArtifactProxy',
        },
    }

    results = pipeline(filename='foo-bar',
                       process_x_inc=5,
                       process_y_inc=10,
                       timestamp=now)
    res = results['res'].artifact
    inputs_json = r._inputs_json(res.inputs)
    assert inputs_json == expected_inputs_json

    results = pipeline(filename='foo-bar',
                       process_x_inc=5,
                       process_y_inc=10,
                       timestamp=now)
    res = results['res'].artifact
    inputs_json = r._inputs_json(res.inputs)
    assert inputs_json == expected_inputs_json