def test_identities(events, result_sets, db, es): # NOTE: We assume that only on identifier scheme being used so just using # identifier values is enough when comparing sets. for ev in generate_payloads(events): EventAPI.handle_event(ev) for rs in result_sets: for v in rs: id_ = Identifier.query.filter_by(value=v).one() ids = set(i.value for i in id_.get_identities()) assert ids == rs
def test_simple_citations(test_case_name, events, results, db, es): """Test simple citation queries.""" for ev in generate_payloads(events): EventAPI.handle_event(ev) es.indices.refresh() for cited_id_value, (citation_result, relation_result) in results.items(): cited_id = (Identifier.query .filter_by(value=cited_id_value).one()) citations = set() relations = set() for citing_id, relation in RelationshipAPI.get_citations(cited_id): citations |= {i.value for i in citing_id} relations |= {(r.source.value, r.target.value) for r in relation} assert citations == citation_result assert relations == relation_result
def generate_full_graph(): # N_nodes = (N_parents * (N_children + 1)) * (N_ids + 1) # N_HasVersion = N_parents * N_children # N_IsIdenticalTo = N_nodes # N_Cites = ~ N_parents * N_citations N_parents = 10000 N_children = 5 N_ids = 2 N_citations = 20 relations = [] parents = ['P_' + str(i) for i in range(N_parents)] par_group = defaultdict(lambda: []) child_lut = {} for p in parents: for id_idx in range(N_ids): p_iden = p + "_ID_" + str(id_idx) par_group[p].append(p_iden) relations.append(['C', p, 'IsIdenticalTo', p_iden, '2018-01-01']) for c_idx in range(N_children): c = p + "_C_" + str(c_idx) par_group[p].append(p) relations.append(['C', p, 'HasVersion', c, '2018-01-01']) for id_idx in range(N_ids): c_iden = c + "_ID_" + str(id_idx) par_group[p].append(c_iden) relations.append(['C', c, 'IsIdenticalTo', c_iden, '2018-01-01']) for idx, (par, grp) in enumerate(par_group.items()): if (idx + 1) <= N_parents - 1: n_samples = min(N_citations, N_parents - idx - 1) next_parents = random.sample(range(idx + 1, N_parents), n_samples) for next_par_idx in next_parents: citeA = random.choice(grp) next_par = parents[next_par_idx] citeB = random.choice(par_group[next_par]) relations.append(['C', citeA, 'Cites', citeB, '2018-01-01']) res = generate_payloads([relations]) print(json.dumps(res, indent=2))
def test_grouping_query(test_case_name, events, results, db, es): for ev in generate_payloads(events): EventAPI.handle_event(ev) for cited_id_value, _ in results.items(): pass
def _handle_events(evtsrc): events = generate_payloads(evtsrc) for ev in events: EventAPI.handle_event(ev)