def repo(request, db_session): # clean old config settings r.Config.set_current(r.Config({}, {}, None)) disk_store_gen = None disk_store_gen2 = None repo2 = None prevdir = os.getcwd() if request.param == 'memoryrepo': repo = r.MemoryRepo(read=True, write=True, delete=True) elif request.param == 'dbrepo-diskstore': disk_store_gen = disk_store() repo = r.DbRepo(db_session, next(disk_store_gen), read=True, write=True, delete=True) elif request.param == 'chained-memmem': repo = r.ChainedRepo([ r.MemoryRepo(read=True, write=True, delete=True), r.MemoryRepo(read=True, write=True, delete=True), ]) elif request.param == 'chained-repo': disk_store_gen = disk_store() disk_store_gen2 = disk_store() repo1 = r.DbRepo(db_session, next(disk_store_gen), read=True, write=True, delete=True) os.chdir(prevdir) repo2 = r.DbRepo( 'postgresql://localhost/test_provenance', next(disk_store_gen2), read=True, write=True, delete=True, schema='second_repo', ) repo = r.ChainedRepo([repo1, repo2]) else: repo = r.DbRepo(db_session, memory_store(), read=True, write=True, delete=True) p.set_default_repo(repo) yield repo p.set_default_repo(None) if repo2 is not None: repo2._db_engine.execute('drop schema second_repo cascade;') if disk_store_gen: next(disk_store_gen, 'ignore') if disk_store_gen2: next(disk_store_gen2, 'ignore')
def test_inputs_json(db_session): repo = r.DbRepo(db_session, bs.MemoryStore()) @p.provenance(version=0, name='initial_data', repo=repo) def load_data(filename, timestamp): return {'data': [1,2,3], 'timestamp': timestamp} @p.provenance(repo=repo) def process_data_X(data, process_x_inc, timestamp): _data = [i + process_x_inc for i in data['data']] return {'data': _data, 'timestamp': timestamp} @p.provenance(repo=repo) def process_data_Y(data, process_y_inc, timestamp): _data = [i + process_y_inc for i in data['data']] return {'data': _data, 'timestamp': timestamp} @p.provenance(repo=repo) def combine_processed_data(filename, inc_x, inc_y, timestamp): _data = [a + b for a, b in zip(inc_x['data'], inc_y['data'])] return {'data': _data, 'timestamp': timestamp} def pipeline(filename, timestamp, process_x_inc, process_y_inc): data = load_data(filename, timestamp) inc_x = process_data_X(data, process_x_inc, timestamp) inc_y = process_data_Y(data, process_y_inc, timestamp) res = combine_processed_data(filename, inc_x, inc_y, timestamp) return {'data': data, 'inc_x': inc_x, 'inc_y': inc_y, 'res': res} now = datetime(2016, 9, 27, 7, 51, 11, 613544) expected_inputs_json = { "__varargs": [], "filename": "foo-bar", "timestamp": now, "inc_x": { "id": "2c33a362ebd51f830d0b245473ab6c1269674259", "name": "test_repos.process_data_X", "type": "ArtifactProxy" }, "inc_y": { "id": "f9b1bb7a8aaf435fbf60b92cd88bf6c46604f702", "name": "test_repos.process_data_Y", "type": "ArtifactProxy" } } results = pipeline(filename='foo-bar', process_x_inc=5, process_y_inc=10, timestamp=now) res = results['res'].artifact inputs_json = r._inputs_json(res.inputs) assert inputs_json == expected_inputs_json results = pipeline(filename='foo-bar', process_x_inc=5, process_y_inc=10, timestamp=now) res = results['res'].artifact inputs_json = r._inputs_json(res.inputs) assert inputs_json == expected_inputs_json
def test_inputs_json(db_session): repo = r.DbRepo(db_session, bs.MemoryStore()) @p.provenance(version=0, name='initial_data', repo=repo) def load_data(filename, timestamp): return {'data': [1, 2, 3], 'timestamp': timestamp} @p.provenance(repo=repo) def process_data_X(data, process_x_inc, timestamp): _data = [i + process_x_inc for i in data['data']] return {'data': _data, 'timestamp': timestamp} @p.provenance(repo=repo) def process_data_Y(data, process_y_inc, timestamp): _data = [i + process_y_inc for i in data['data']] return {'data': _data, 'timestamp': timestamp} @p.provenance(repo=repo) def combine_processed_data(filename, inc_x, inc_y, timestamp): _data = [a + b for a, b in zip(inc_x['data'], inc_y['data'])] return {'data': _data, 'timestamp': timestamp} def pipeline(filename, timestamp, process_x_inc, process_y_inc): data = load_data(filename, timestamp) inc_x = process_data_X(data, process_x_inc, timestamp) inc_y = process_data_Y(data, process_y_inc, timestamp) res = combine_processed_data(filename, inc_x, inc_y, timestamp) return {'data': data, 'inc_x': inc_x, 'inc_y': inc_y, 'res': res} now = datetime(2016, 9, 27, 7, 51, 11, 613544) expected_inputs_json = { '__varargs': [], 'filename': 'foo-bar', 'timestamp': now, 'inc_x': { 'id': 'c74da9d379234901fe7a89e03fa800b0', # md5 # "id": "2c33a362ebd51f830d0b245473ab6c1269674259", # sha1 'name': 'test_repos.process_data_X', 'type': 'ArtifactProxy', }, 'inc_y': { 'id': 'a1bd4d4ae1f33ae6379613618427f127', # md5 # "id": "f9b1bb7a8aaf435fbf60b92cd88bf6c46604f702", # sha1 'name': 'test_repos.process_data_Y', 'type': 'ArtifactProxy', }, } results = pipeline(filename='foo-bar', process_x_inc=5, process_y_inc=10, timestamp=now) res = results['res'].artifact inputs_json = r._inputs_json(res.inputs) assert inputs_json == expected_inputs_json results = pipeline(filename='foo-bar', process_x_inc=5, process_y_inc=10, timestamp=now) res = results['res'].artifact inputs_json = r._inputs_json(res.inputs) assert inputs_json == expected_inputs_json