def main(): a = parse_args() nodes = os.path.join(a.dir, NODES_IN_FILE) names = os.path.join(a.dir, NAMES_IN_FILE) merged = os.path.join(a.dir, MERGED_IN_FILE) client = ArangoClient(hosts=a.arango_url) if a.user: if a.pwd_file: with open(a.pwd_file) as pwd_file: pwd = pwd_file.read().strip() else: pwd = getpass.getpass() db = client.db(a.database, a.user, pwd, verify=True) else: db = client.db(a.database, verify=True) attdb = ArangoBatchTimeTravellingDB( db, a.load_registry_collection, a.node_collection, default_edge_collection=a.edge_collection, merge_collection=a.merge_edge_collection) with open(nodes) as in1, open(names) as namesfile, open(nodes) as in2, open(merged) as merge: nodeprov = NCBINodeProvider(namesfile, in1) edgeprov = NCBIEdgeProvider(in2) merge = NCBIMergeProvider(merge) load_graph_delta(_LOAD_NAMESPACE, nodeprov, edgeprov, attdb, a.load_timestamp, a.release_timestamp, a.load_version, merge_source=merge)
def main(): a = parse_args() client = ArangoClient(hosts=a.arango_url) if a.user: if a.pwd_file: with open(a.pwd_file) as pwd_file: pwd = pwd_file.read().strip() else: pwd = getpass.getpass() db = client.db(a.database, a.user, pwd, verify=True) else: db = client.db(a.database, verify=True) attdb = ArangoBatchTimeTravellingDB( db, a.load_registry_collection, a.node_collection, default_edge_collection=a.edge_collection, merge_collection=a.merge_edge_collection) with open(a.file) as f: obograph = json.loads(f.read()) loader = OBOGraphLoader(obograph, a.onto_id_prefix, graph_id=a.graph_id) load_graph_delta(a.load_namespace, loader.get_node_provider(), loader.get_edge_provider(), attdb, a.load_timestamp, a.release_timestamp, a.load_version, merge_source=loader.get_merge_provider())
def main(): a = parse_args() url = urlparse(a.arango_url) client = ArangoClient(protocol=url.scheme, host=url.hostname, port=url.port) if a.user: if a.pwd_file: with open(a.pwd_file) as pwd_file: pwd = pwd_file.read().strip() else: pwd = getpass.getpass() db = client.db(a.database, a.user, pwd, verify=True) else: db = client.db(a.database, verify=True) attdb = ArangoBatchTimeTravellingDB( db, a.load_registry_collection, a.node_collection, default_edge_collection=a.edge_collection) with open(a.file) as in1, open(a.file) as in2: nodeprov = GTDBNodeProvider(in1) edgeprov = GTDBEdgeProvider(in2) load_graph_delta(_LOAD_NAMESPACE, nodeprov, edgeprov, attdb, a.load_timestamp, a.release_timestamp, a.load_version)
def main(): a = parse_args() client = ArangoClient(hosts=a.arango_url) if a.user: if a.pwd_file: with open(a.pwd_file) as pwd_file: pwd = pwd_file.read().strip() else: pwd = getpass.getpass() db = client.db(a.database, a.user, pwd, verify=True) else: db = client.db(a.database, verify=True) attdb = ArangoBatchTimeTravellingDB( db, a.load_registry_collection, a.node_collection, default_edge_collection=a.edge_collection) TaxNode.parse_taxfile(a.input_dir) SeqNode.parse_fastas(a.input_dir) nodeprov = SILVANodeProvider() edgeprov = SILVAEdgeProvider() print('got node/edge providers') load_graph_delta(_LOAD_NAMESPACE, nodeprov, edgeprov, attdb, a.load_timestamp, a.release_timestamp, a.load_version)
def test_merge_setup_fail(arango_db): """ Tests that the algorithm fails to start if a merge source is specified but a merge collection is not """ create_timetravel_collection(arango_db, 'v') create_timetravel_collection(arango_db, 'e', edge=True) arango_db.create_collection('r') att = ArangoBatchTimeTravellingDB(arango_db, 'r', 'v', default_edge_collection='e') # sources are fake, but real not necessary to trigger error check_exception( lambda: load_graph_delta('ns', [], [], att, 1, 1, "2", merge_source=[{}]), ValueError, 'A merge source is specified but the database has no merge collection')
def test_rollback_fail_nothing_to_roll_back(arango_db): """ Test that a rollback fails if theres < 2 loads registered. """ create_timetravel_collection(arango_db, 'v') create_timetravel_collection(arango_db, 'e', edge=True) arango_db.create_collection('r') db = ArangoBatchTimeTravellingDB(arango_db, 'r', 'v', default_edge_collection='e') db.register_load_start('ns1', 'v1', 1000, 500, 100) db.register_load_complete('ns1', 'v1', 150) check_exception( lambda: roll_back_last_load(db, 'ns1'), ValueError, 'Nothing to roll back')
def main(): a = parse_args() if not a.file_16S and not a.file_28S: raise ValueError('no input files were supplied') url = urlparse(a.arango_url) client = ArangoClient(protocol=url.scheme, host=url.hostname, port=url.port) if a.user: if a.pwd_file: with open(a.pwd_file) as pwd_file: pwd = pwd_file.read().strip() else: pwd = getpass.getpass() db = client.db(a.database, a.user, pwd, verify=True) else: db = client.db(a.database, verify=True) attdb = ArangoBatchTimeTravellingDB( db, a.load_registry_collection, a.node_collection, default_edge_collection=a.edge_collection) with ExitStack() as stack: files_16S = [ stack.enter_context(gzip.open(f, 'rt')) for f in a.file_16S ] files_28S = [ stack.enter_context(gzip.open(f, 'rt')) for f in a.file_28S ] edgefiles = [ stack.enter_context(gzip.open(f, 'rt')) for f in a.file_16S + a.file_28S ] nodeprov = RDPNodeProvider(files_16S, files_28S) edgeprov = RDPEdgeProvider(edgefiles) load_graph_delta(_LOAD_NAMESPACE, nodeprov, edgeprov, attdb, a.load_timestamp, a.release_timestamp, a.load_version)
def test_rollback_with_merge_collection(arango_db): """ Test rolling back a load including a merge collection. """ vcol = create_timetravel_collection(arango_db, 'v') edcol = create_timetravel_collection(arango_db, 'def_e', edge=True) e1col = create_timetravel_collection(arango_db, 'e1', edge=True) e2col = create_timetravel_collection(arango_db, 'e2', edge=True) mcol = create_timetravel_collection(arango_db, 'm', edge=True) arango_db.create_collection('r') m = ADB_MAX_TIME _import_v(vcol, {'id': '1', 'k': '1'}, 0, m, 0, m, 'v1', 'v2') _import_v(vcol, {'id': '2', 'k': '2'}, 300, m, 299, m, 'v2', 'v2') _import_v(vcol, {'id': '3', 'k': '3'}, 0, 299, 0, 298, 'v1', 'v1') _import_v(vcol, {'id': '3', 'k': '3'}, 300, m, 299, m, 'v2', 'v2') _import_v(vcol, {'id': '4', 'k': '4'}, 0, 299, 0, 298, 'v1', 'v1') _import_e(edcol, { 'id': '1', 'to': '1', 'from': '1', 'k': '1' }, 0, m, 0, m, 'v1', 'v2', 'f') _import_e(edcol, { 'id': '2', 'to': '2', 'from': '2', 'k': '2' }, 300, m, 299, m, 'v2', 'v2', 'f') _import_e(e1col, { 'id': '1', 'to': '1', 'from': '1', 'k': '1' }, 0, 299, 0, 298, 'v1', 'v1', 'f') _import_e(e1col, { 'id': '1', 'to': '1', 'from': '1', 'k': '1' }, 300, m, 299, m, 'v2', 'v2', 'f') _import_e(e2col, { 'id': '1', 'to': '1', 'from': '1', 'k': '1' }, 0, 299, 0, 298, 'v1', 'v1', 'f') # merge edges are never updated once created _import_e(mcol, { 'id': '1', 'to': '1', 'from': '1', 'k': '1' }, 0, m, 0, m, 'v1', 'v1', 'f') _import_e(mcol, { 'id': '2', 'to': '2', 'from': '2', 'k': '2' }, 300, m, 299, m, 'v2', 'v2', 'f') db = ArangoBatchTimeTravellingDB(arango_db, 'r', 'v', default_edge_collection='def_e', edge_collections=['e1', 'e2'], merge_collection='m') db.register_load_start('ns1', 'v1', 0, 0, 4567) db.register_load_complete('ns1', 'v1', 5678) db.register_load_start('ns1', 'v2', 300, 250, 6789) db.register_load_complete('ns1', 'v2', 7890) fac = ArangoBatchTimeTravellingDBFactory(arango_db, 'r') roll_back_last_load(fac, 'ns1') vexpected = [ { 'id': '1', '_key': '1_v1', '_id': 'v/1_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 0, 'expired': ADB_MAX_TIME, 'release_created': 0, 'release_expired': ADB_MAX_TIME, 'k': '1' }, { 'id': '3', '_key': '3_v1', '_id': 'v/3_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 0, 'expired': ADB_MAX_TIME, 'release_created': 0, 'release_expired': ADB_MAX_TIME, 'k': '3' }, { 'id': '4', '_key': '4_v1', '_id': 'v/4_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 0, 'expired': ADB_MAX_TIME, 'release_created': 0, 'release_expired': ADB_MAX_TIME, 'k': '4' }, ] check_docs(arango_db, vexpected, 'v') ed_expected = [ { 'id': '1', 'from': '1', 'to': '1', '_key': '1_v1', '_id': 'def_e/1_v1', '_from': 'f/1_v1', '_to': 'f/1_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 0, 'expired': ADB_MAX_TIME, 'release_created': 0, 'release_expired': ADB_MAX_TIME, 'k': '1' }, ] check_docs(arango_db, ed_expected, 'def_e') e1_expected = [ { 'id': '1', 'from': '1', 'to': '1', '_key': '1_v1', '_id': 'e1/1_v1', '_from': 'f/1_v1', '_to': 'f/1_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 0, 'expired': ADB_MAX_TIME, 'release_created': 0, 'release_expired': ADB_MAX_TIME, 'k': '1' }, ] check_docs(arango_db, e1_expected, 'e1') e2_expected = [ { 'id': '1', 'from': '1', 'to': '1', '_key': '1_v1', '_id': 'e2/1_v1', '_from': 'f/1_v1', '_to': 'f/1_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 0, 'expired': ADB_MAX_TIME, 'release_created': 0, 'release_expired': ADB_MAX_TIME, 'k': '1' }, ] check_docs(arango_db, e2_expected, 'e2') m_expected = [ { 'id': '1', 'from': '1', 'to': '1', '_key': '1_v1', '_id': 'm/1_v1', '_from': 'f/1_v1', '_to': 'f/1_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 0, 'expired': ADB_MAX_TIME, 'release_created': 0, 'release_expired': ADB_MAX_TIME, 'k': '1' }, ] check_docs(arango_db, m_expected, 'm') registry_expected = { '_key': 'ns1_v1', '_id': 'r/ns1_v1', 'load_namespace': 'ns1', 'load_version': 'v1', 'load_timestamp': 0, 'release_timestamp': 0, 'start_time': 4567, 'completion_time': 5678, 'state': 'complete', 'vertex_collection': 'v', 'merge_collection': 'm', 'edge_collections': ['def_e', 'e1', 'e2'] } _check_registry_doc(arango_db, registry_expected, 'r')
def _load_no_merge_source(arango_db, batchsize): """ Test delta loading a small graph, including deleted, updated, unchanged, and new nodes and edges. """ vcol = create_timetravel_collection(arango_db, 'v') def_ecol = create_timetravel_collection(arango_db, 'def_e', edge=True) e1col = create_timetravel_collection(arango_db, 'e1', edge=True) e2col = create_timetravel_collection(arango_db, 'e2', edge=True) arango_db.create_collection('r') _import_bulk( vcol, [ { 'id': 'expire', 'data': 'foo' }, # expired nodes shouldn't be touched { 'id': 'gap', 'data': 'super sweet' }, # even if reintroduced later ], 100, 300, 99, 299, 'v0') # there are 2 update and 2 same nodes for the purposes of testing edge updates correctly _import_bulk( vcol, [ { 'id': 'old', 'data': 'foo' }, # will be deleted { 'id': 'same1', 'data': { 'bar': 'baz' } }, # will not change { 'id': 'same2', 'data': ['bar', 'baz'] }, # will not change { 'id': 'up1', 'data': { 'old': 'data' } }, # will be updated { 'id': 'up2', 'data': ['old', 'data'] } # will be updated ], 100, ADB_MAX_TIME, 99, ADB_MAX_TIME, 'v0', 'v1') _import_bulk( def_ecol, [ { 'id': 'expire', 'from': 'expire', 'to': 'same2', 'data': 'foo' }, # shouldn't be touched { 'id': 'gap', 'from': 'gap', 'to': 'same1', 'data': 'bar' } # ditto ], 100, 300, 99, 299, 'v0', vert_col_name=vcol.name) _import_bulk( def_ecol, [ { 'id': 'old', 'from': 'old', 'to': 'up1', 'data': 'foo' }, # will be deleted { 'id': 'up1', 'from': 'same1', 'to': 'up1', 'data': 'bar' } # will be updated to new up1 ], 100, ADB_MAX_TIME, 99, ADB_MAX_TIME, 'v0', 'v1', vert_col_name=vcol.name) _import_bulk( e1col, [ { 'id': 'old', 'from': 'old', 'to': 'same1', 'data': 'baz' }, # will be deleted { 'id': 'same', 'from': 'same1', 'to': 'same2', 'data': 'bing' } # no change ], 100, ADB_MAX_TIME, 99, ADB_MAX_TIME, 'v0', 'v1', vert_col_name=vcol.name) _import_bulk( e2col, [ { 'id': 'change', 'from': 'same1', 'to': 'same2', 'data': 'baz' }, # will be updated { 'id': 'up2', 'from': 'up2', 'to': 'same2', 'data': 'boof' } # will be updated to up2 ], 100, ADB_MAX_TIME, 99, ADB_MAX_TIME, 'v0', 'v1', vert_col_name=vcol.name) vsource = [ { 'id': 'same1', 'data': { 'bar': 'baz' } }, # will not change { 'id': 'same2', 'data': ['bar', 'baz'] }, # will not change { 'id': 'up1', 'data': { 'new': 'data' } }, # will be updated based on data { 'id': 'up2', 'data': ['old', 'data1'] }, # will be updated based on data { 'id': 'gap', 'data': 'super sweet' } # new node ] esource = [ # will be updated since up1 is updated. Default collection. { 'id': 'up1', 'from': 'same1', 'to': 'up1', 'data': 'bar' }, # won't change { '_collection': 'e1', 'id': 'same', 'from': 'same1', 'to': 'same2', 'data': 'bing' }, # will be updated based on data { '_collection': 'e2', 'id': 'change', 'from': 'same1', 'to': 'same2', 'data': 'boo' }, # will be updated since up2 is updated. { '_collection': 'e2', 'id': 'up2', 'from': 'up2', 'to': 'same2', 'data': 'boof' }, # new edge { '_collection': 'def_e', 'id': 'gap', 'from': 'gap', 'to': 'same1', 'data': 'bar' } ] db = ArangoBatchTimeTravellingDB(arango_db, 'r', 'v', default_edge_collection='def_e', edge_collections=['e1', 'e2']) if batchsize: load_graph_delta('ns', vsource, esource, db, 500, 400, 'v2', batch_size=batchsize) else: load_graph_delta('ns', vsource, esource, db, 500, 400, 'v2') vexpected = [ { 'id': 'expire', '_key': 'expire_v0', '_id': 'v/expire_v0', 'first_version': 'v0', 'last_version': 'v0', 'created': 100, 'expired': 300, 'release_created': 99, 'release_expired': 299, 'data': 'foo' }, { 'id': 'gap', '_key': 'gap_v0', '_id': 'v/gap_v0', 'first_version': 'v0', 'last_version': 'v0', 'created': 100, 'expired': 300, 'release_created': 99, 'release_expired': 299, 'data': 'super sweet' }, { 'id': 'gap', '_key': 'gap_v2', '_id': 'v/gap_v2', 'first_version': 'v2', 'last_version': 'v2', 'created': 500, 'expired': ADB_MAX_TIME, 'release_created': 400, 'release_expired': ADB_MAX_TIME, 'data': 'super sweet' }, { 'id': 'old', '_key': 'old_v0', '_id': 'v/old_v0', 'first_version': 'v0', 'last_version': 'v1', 'created': 100, 'expired': 499, 'release_created': 99, 'release_expired': 399, 'data': 'foo' }, { 'id': 'same1', '_key': 'same1_v0', '_id': 'v/same1_v0', 'first_version': 'v0', 'last_version': 'v2', 'created': 100, 'expired': ADB_MAX_TIME, 'release_created': 99, 'release_expired': ADB_MAX_TIME, 'data': { 'bar': 'baz' } }, { 'id': 'same2', '_key': 'same2_v0', '_id': 'v/same2_v0', 'first_version': 'v0', 'last_version': 'v2', 'created': 100, 'expired': ADB_MAX_TIME, 'release_created': 99, 'release_expired': ADB_MAX_TIME, 'data': ['bar', 'baz'] }, { 'id': 'up1', '_key': 'up1_v0', '_id': 'v/up1_v0', 'first_version': 'v0', 'last_version': 'v1', 'created': 100, 'expired': 499, 'release_created': 99, 'release_expired': 399, 'data': { 'old': 'data' } }, { 'id': 'up1', '_key': 'up1_v2', '_id': 'v/up1_v2', 'first_version': 'v2', 'last_version': 'v2', 'created': 500, 'expired': ADB_MAX_TIME, 'release_created': 400, 'release_expired': ADB_MAX_TIME, 'data': { 'new': 'data' } }, { 'id': 'up2', '_key': 'up2_v0', '_id': 'v/up2_v0', 'first_version': 'v0', 'last_version': 'v1', 'created': 100, 'expired': 499, 'release_created': 99, 'release_expired': 399, 'data': ['old', 'data'] }, { 'id': 'up2', '_key': 'up2_v2', '_id': 'v/up2_v2', 'first_version': 'v2', 'last_version': 'v2', 'created': 500, 'expired': ADB_MAX_TIME, 'release_created': 400, 'release_expired': ADB_MAX_TIME, 'data': ['old', 'data1'] }, ] check_docs(arango_db, vexpected, 'v') def_e_expected = [ { 'id': 'expire', 'from': 'expire', 'to': 'same2', '_key': 'expire_v0', '_id': 'def_e/expire_v0', '_from': 'v/expire_v0', '_to': 'v/same2_v0', 'first_version': 'v0', 'last_version': 'v0', 'created': 100, 'expired': 300, 'release_created': 99, 'release_expired': 299, 'data': 'foo' }, { 'id': 'gap', 'from': 'gap', 'to': 'same1', '_key': 'gap_v0', '_id': 'def_e/gap_v0', '_from': 'v/gap_v0', '_to': 'v/same1_v0', 'first_version': 'v0', 'last_version': 'v0', 'created': 100, 'expired': 300, 'release_created': 99, 'release_expired': 299, 'data': 'bar' }, { 'id': 'gap', 'from': 'gap', 'to': 'same1', '_key': 'gap_v2', '_id': 'def_e/gap_v2', '_from': 'v/gap_v2', '_to': 'v/same1_v0', 'first_version': 'v2', 'last_version': 'v2', 'created': 500, 'expired': ADB_MAX_TIME, 'release_created': 400, 'release_expired': ADB_MAX_TIME, 'data': 'bar' }, { 'id': 'old', 'from': 'old', 'to': 'up1', '_key': 'old_v0', '_id': 'def_e/old_v0', '_from': 'v/old_v0', '_to': 'v/up1_v0', 'first_version': 'v0', 'last_version': 'v1', 'created': 100, 'expired': 499, 'release_created': 99, 'release_expired': 399, 'data': 'foo' }, { 'id': 'up1', 'from': 'same1', 'to': 'up1', '_key': 'up1_v0', '_id': 'def_e/up1_v0', '_from': 'v/same1_v0', '_to': 'v/up1_v0', 'first_version': 'v0', 'last_version': 'v1', 'created': 100, 'expired': 499, 'release_created': 99, 'release_expired': 399, 'data': 'bar' }, { 'id': 'up1', 'from': 'same1', 'to': 'up1', '_key': 'up1_v2', '_id': 'def_e/up1_v2', '_from': 'v/same1_v0', '_to': 'v/up1_v2', 'first_version': 'v2', 'last_version': 'v2', 'created': 500, 'expired': ADB_MAX_TIME, 'release_created': 400, 'release_expired': ADB_MAX_TIME, 'data': 'bar' }, ] check_docs(arango_db, def_e_expected, 'def_e') e1_expected = [ { 'id': 'old', 'from': 'old', 'to': 'same1', '_key': 'old_v0', '_id': 'e1/old_v0', '_from': 'v/old_v0', '_to': 'v/same1_v0', 'first_version': 'v0', 'last_version': 'v1', 'created': 100, 'expired': 499, 'release_created': 99, 'release_expired': 399, 'data': 'baz' }, { 'id': 'same', 'from': 'same1', 'to': 'same2', '_key': 'same_v0', '_id': 'e1/same_v0', '_from': 'v/same1_v0', '_to': 'v/same2_v0', 'first_version': 'v0', 'last_version': 'v2', 'created': 100, 'expired': ADB_MAX_TIME, 'release_created': 99, 'release_expired': ADB_MAX_TIME, 'data': 'bing' }, ] check_docs(arango_db, e1_expected, 'e1') e2_expected = [ { 'id': 'change', 'from': 'same1', 'to': 'same2', '_key': 'change_v0', '_id': 'e2/change_v0', '_from': 'v/same1_v0', '_to': 'v/same2_v0', 'first_version': 'v0', 'last_version': 'v1', 'created': 100, 'expired': 499, 'release_created': 99, 'release_expired': 399, 'data': 'baz' }, { 'id': 'change', 'from': 'same1', 'to': 'same2', '_key': 'change_v2', '_id': 'e2/change_v2', '_from': 'v/same1_v0', '_to': 'v/same2_v0', 'first_version': 'v2', 'last_version': 'v2', 'created': 500, 'expired': ADB_MAX_TIME, 'release_created': 400, 'release_expired': ADB_MAX_TIME, 'data': 'boo' }, { 'id': 'up2', 'from': 'up2', 'to': 'same2', '_key': 'up2_v0', '_id': 'e2/up2_v0', '_from': 'v/up2_v0', '_to': 'v/same2_v0', 'first_version': 'v0', 'last_version': 'v1', 'created': 100, 'expired': 499, 'release_created': 99, 'release_expired': 399, 'data': 'boof' }, { 'id': 'up2', 'from': 'up2', 'to': 'same2', '_key': 'up2_v2', '_id': 'e2/up2_v2', '_from': 'v/up2_v2', '_to': 'v/same2_v0', 'first_version': 'v2', 'last_version': 'v2', 'created': 500, 'expired': ADB_MAX_TIME, 'release_created': 400, 'release_expired': ADB_MAX_TIME, 'data': 'boof' }, ] check_docs(arango_db, e2_expected, 'e2') registry_expected = { '_key': 'ns_v2', '_id': 'r/ns_v2', 'load_namespace': 'ns', 'load_version': 'v2', 'load_timestamp': 500, 'release_timestamp': 400, # 'start_time': 0, # 'completion_time': 0, 'state': 'complete', 'vertex_collection': 'v', 'merge_collection': None, 'edge_collections': ['def_e', 'e1', 'e2'] } _check_registry_doc(arango_db, registry_expected, 'r', compare_times_to_now=True)
def test_merge_edges(arango_db): """ Test that merge edges are handled appropriately. """ vcol = create_timetravel_collection(arango_db, 'v') ecol = create_timetravel_collection(arango_db, 'e', edge=True) create_timetravel_collection(arango_db, 'm', edge=True) arango_db.create_collection('r') _import_bulk( vcol, [ { 'id': 'root', 'data': 'foo' }, # will not change { 'id': 'merged', 'data': 'bar' }, # will be merged { 'id': 'target', 'data': 'baz' }, # will not change ], 100, ADB_MAX_TIME, 99, ADB_MAX_TIME, 'v1') _import_bulk( ecol, [ { 'id': 'to_m', 'from': 'root', 'to': 'merged', 'data': 'foo' }, # will be deleted { 'id': 'to_t', 'from': 'root', 'to': 'target', 'data': 'bar' } # shouldn't be touched ], 100, ADB_MAX_TIME, 99, ADB_MAX_TIME, 'v1', vert_col_name=vcol.name) vsource = [ { 'id': 'root', 'data': 'foo' }, # will not change { 'id': 'target', 'data': 'baz' }, # will not change ] esource = [{ 'id': 'to_t', 'from': 'root', 'to': 'target', 'data': 'bar' } # no change ] msource = [ { 'id': 'f_to_t', 'from': 'fake1', 'to': 'target', 'data': 'whee' }, # will be ignored { 'id': 'm_to_t', 'from': 'merged', 'to': 'target', 'data': 'woo' }, # will be applied { 'id': 't_to_f', 'from': 'target', 'to': 'fake2', 'data': 'whoa' } # will be ignored ] db = ArangoBatchTimeTravellingDB(arango_db, 'r', 'v', default_edge_collection='e', merge_collection='m') load_graph_delta('mns', vsource, esource, db, 500, 400, 'v2', merge_source=msource) vexpected = [ { 'id': 'root', '_key': 'root_v1', '_id': 'v/root_v1', 'first_version': 'v1', 'last_version': 'v2', 'created': 100, 'expired': ADB_MAX_TIME, 'release_created': 99, 'release_expired': ADB_MAX_TIME, 'data': 'foo' }, { 'id': 'merged', '_key': 'merged_v1', '_id': 'v/merged_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 100, 'expired': 499, 'release_created': 99, 'release_expired': 399, 'data': 'bar' }, { 'id': 'target', '_key': 'target_v1', '_id': 'v/target_v1', 'first_version': 'v1', 'last_version': 'v2', 'created': 100, 'expired': ADB_MAX_TIME, 'release_created': 99, 'release_expired': ADB_MAX_TIME, 'data': 'baz' }, ] check_docs(arango_db, vexpected, 'v') e_expected = [ { 'id': 'to_m', 'from': 'root', 'to': 'merged', '_key': 'to_m_v1', '_id': 'e/to_m_v1', '_from': 'v/root_v1', '_to': 'v/merged_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 100, 'expired': 499, 'release_created': 99, 'release_expired': 399, 'data': 'foo' }, { 'id': 'to_t', 'from': 'root', 'to': 'target', '_key': 'to_t_v1', '_id': 'e/to_t_v1', '_from': 'v/root_v1', '_to': 'v/target_v1', 'first_version': 'v1', 'last_version': 'v2', 'created': 100, 'expired': ADB_MAX_TIME, 'release_created': 99, 'release_expired': ADB_MAX_TIME, 'data': 'bar' }, ] check_docs(arango_db, e_expected, 'e') m_expected = [ { 'id': 'm_to_t', 'from': 'merged', 'to': 'target', '_key': 'm_to_t_v2', '_id': 'm/m_to_t_v2', '_from': 'v/merged_v1', '_to': 'v/target_v1', 'first_version': 'v2', 'last_version': 'v2', 'created': 500, 'expired': ADB_MAX_TIME, 'release_created': 400, 'release_expired': ADB_MAX_TIME, 'data': 'woo' }, ] check_docs(arango_db, m_expected, 'm') registry_expected = { '_key': 'mns_v2', '_id': 'r/mns_v2', 'load_namespace': 'mns', 'load_version': 'v2', 'load_timestamp': 500, 'release_timestamp': 400, # 'start_time': 0, # 'completion_time': 0, 'state': 'complete', 'vertex_collection': 'v', 'merge_collection': 'm', 'edge_collections': ['e'] } _check_registry_doc(arango_db, registry_expected, 'r', compare_times_to_now=True)
def test_rollback_without_merge_collection(arango_db): """ Test rolling back a load with no merge collection and only one edge collection. """ vcol = create_timetravel_collection(arango_db, 'v') ecol = create_timetravel_collection(arango_db, 'e', edge=True) arango_db.create_collection('r') m = ADB_MAX_TIME _import_v(vcol, {'id': '1', 'k': '1'}, 0, m, 0, m, 'v1', 'v2') _import_v(vcol, {'id': '2', 'k': '2'}, 300, m, 299, m, 'v2', 'v2') _import_v(vcol, {'id': '3', 'k': '3'}, 0, 299, 0, 298, 'v1', 'v1') _import_v(vcol, {'id': '3', 'k': '3'}, 300, m, 299, m, 'v2', 'v2') _import_v(vcol, {'id': '4', 'k': '4'}, 0, 299, 0, 298, 'v1', 'v1') _import_e(ecol, { 'id': '1', 'to': '1', 'from': '1', 'k': '1' }, 0, m, 0, m, 'v1', 'v2', 'f') _import_e(ecol, { 'id': '2', 'to': '2', 'from': '2', 'k': '2' }, 300, m, 299, m, 'v2', 'v2', 'f') _import_e(ecol, { 'id': '3', 'to': '3', 'from': '3', 'k': '3' }, 0, 299, 0, 298, 'v1', 'v1', 'f') _import_e(ecol, { 'id': '3', 'to': '3', 'from': '3', 'k': '3' }, 300, m, 399, 0, 'v2', 'v2', 'f') _import_e(ecol, { 'id': '4', 'to': '4', 'from': '4', 'k': '4' }, 0, 299, 0, 298, 'v1', 'v1', 'f') db = ArangoBatchTimeTravellingDB(arango_db, 'r', 'v', default_edge_collection='e') db.register_load_start('ns1', 'v1', 0, 0, 4567) db.register_load_complete('ns1', 'v1', 5678) db.register_load_start('ns1', 'v2', 300, 250, 6789) db.register_load_complete('ns1', 'v2', 7890) fac = ArangoBatchTimeTravellingDBFactory(arango_db, 'r') roll_back_last_load(fac, 'ns1') vexpected = [ { 'id': '1', '_key': '1_v1', '_id': 'v/1_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 0, 'expired': ADB_MAX_TIME, 'release_created': 0, 'release_expired': ADB_MAX_TIME, 'k': '1' }, { 'id': '3', '_key': '3_v1', '_id': 'v/3_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 0, 'expired': ADB_MAX_TIME, 'release_created': 0, 'release_expired': ADB_MAX_TIME, 'k': '3' }, { 'id': '4', '_key': '4_v1', '_id': 'v/4_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 0, 'expired': ADB_MAX_TIME, 'release_created': 0, 'release_expired': ADB_MAX_TIME, 'k': '4' }, ] check_docs(arango_db, vexpected, 'v') e_expected = [ { 'id': '1', 'from': '1', 'to': '1', '_key': '1_v1', '_id': 'e/1_v1', '_from': 'f/1_v1', '_to': 'f/1_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 0, 'expired': ADB_MAX_TIME, 'release_created': 0, 'release_expired': ADB_MAX_TIME, 'k': '1' }, { 'id': '3', 'from': '3', 'to': '3', '_key': '3_v1', '_id': 'e/3_v1', '_from': 'f/3_v1', '_to': 'f/3_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 0, 'expired': ADB_MAX_TIME, 'release_created': 0, 'release_expired': ADB_MAX_TIME, 'k': '3' }, { 'id': '4', 'from': '4', 'to': '4', '_key': '4_v1', '_id': 'e/4_v1', '_from': 'f/4_v1', '_to': 'f/4_v1', 'first_version': 'v1', 'last_version': 'v1', 'created': 0, 'expired': ADB_MAX_TIME, 'release_created': 0, 'release_expired': ADB_MAX_TIME, 'k': '4' }, ] check_docs(arango_db, e_expected, 'e') registry_expected = { '_key': 'ns1_v1', '_id': 'r/ns1_v1', 'load_namespace': 'ns1', 'load_version': 'v1', 'load_timestamp': 0, 'release_timestamp': 0, 'start_time': 4567, 'completion_time': 5678, 'state': 'complete', 'vertex_collection': 'v', 'merge_collection': None, 'edge_collections': ['e'] } _check_registry_doc(arango_db, registry_expected, 'r')