def create_objects_from_relations(relationships: List[Tuple], metadata: List[Tuple[dict]]=None): """Given a list of relationships, create all corresponding DB objects. Optional 'metadata' list can be passed, which contains corresponding metadata items for each of the relationships, i.e., a triplet of Source, Relation and Target metadata. E.g.: relationships = [ ('A', Relation.Cites, 'B'), ('C', Relation.Cites, 'D'), ] metadata = [ ({<source-1>}, {<relation-1>}, {<target-1>}), ({<source-2>}, {<relation-2>}, {<target-2>}), ] Will create Identifier, Relationship, Group and all M2M objects. """ if not metadata: metadata = [({}, {}, {}) for _ in range(len(relationships))] assert len(relationships) == len(metadata) identifiers = sorted(set(sum([[a, b] for a, _, b in relationships], []))) groups = [] # Contains pairs of (Identifier2Group, Group2Group) for i in identifiers: id_ = Identifier(value=i, scheme='doi') db.session.add(id_) groups.append(get_or_create_groups(id_)) rel_obj = [] for (src, rel, tar), (src_m, rel_m, tar_m) in zip(relationships, metadata): src_, tar_ = Identifier.get(src, 'doi'), \ Identifier.get(tar, 'doi') r = Relationship(source=src_, target=tar_, relation=rel) db.session.add(r) rel_obj.append(r) s_id_gr, s_ver_gr = groups[identifiers.index(src)] t_id_gr, t_ver_gr = groups[identifiers.index(tar)] id_gr_rel = GroupRelationship( source=s_id_gr, target=t_id_gr, relation=rel, type=GroupType.Identity, id=uuid.uuid4()) s_id_gr.data.update(src_m, validate=False) t_id_gr.data.update(tar_m, validate=False) grm = GroupRelationshipMetadata(group_relationship_id=id_gr_rel.id) db.session.add(grm) grm.update(rel_m, validate=False) db.session.add(Relationship2GroupRelationship( relationship=r, group_relationship=id_gr_rel)) db.session.add(id_gr_rel) ver_gr_rel = GroupRelationship( source=s_ver_gr, target=t_ver_gr, relation=rel, type=GroupType.Version) db.session.add(GroupRelationshipM2M( relationship=ver_gr_rel, subrelationship=id_gr_rel)) db.session.add(ver_gr_rel) db.session.commit()
def test_get_or_create_groups(db): """Test creating groups (Identity and Version) for an identifier.""" id1 = Identifier(value='A', scheme='doi') db.session.add(id1) # id2 = Identifier(value='B', scheme='doi') # rel = Relationship(source=id1, target=id2, # relation=Relation.IsIdenticalTo) assert not Group.query.count() assert not GroupM2M.query.count() assert not Identifier2Group.query.count() id_g, ver_g = get_or_create_groups(id1) db.session.commit() def _check_groups(identifier, id_g, ver_g): assert Group.query.count() == 2 assert GroupM2M.query.count() == 1 assert Identifier2Group.query.count() == 1 assert Group.query.filter_by(type=GroupType.Identity).one() == id_g assert Group.query.filter_by(type=GroupType.Version).one() == ver_g id2g = Identifier2Group.query.one() g2g = GroupM2M.query.one() assert id2g.identifier == identifier assert id2g.group == id_g assert g2g.group == ver_g assert g2g.subgroup == id_g _check_groups(id1, id_g, ver_g) # Fetch the ID again and try to create groups again id2 = Identifier.get('A', 'doi') assert id2 id_g, ver_g = get_or_create_groups(id1) db.session.commit() # Make sure nothing changed _check_groups(id2, id_g, ver_g) # Add a new, separate identifier id3 = Identifier(value='B', scheme='doi') db.session.add(id3) id_g, ver_g = get_or_create_groups(id3) assert Group.query.count() == 4 assert GroupM2M.query.count() == 2 assert Identifier2Group.query.count() == 2
def id_obj(identifier, scheme): """Identifier DB object generator.""" return Identifier(value=identifier, scheme=scheme)
def id_obj(identifier, scheme): return Identifier(value=identifier, scheme=scheme)
def assert_grouping(grouping): """Determine if database state corresponds to 'grouping' definition. See tests in test_grouping.py for example input. """ groups, relationships, relationship_groups = grouping group_types = [ (GroupType.Identity if isinstance(g[0], str) else GroupType.Version) for g in groups] # Mapping 'relationship_types' is a mapping between relationship index to: # * None if its a regular Relation between Identifiers # * GroupType.Identity if it's a relation between 'Identity'-type Groups # * GroupType.Version if it's a relation between 'Version'-type Groups relationship_types = [None if isinstance(r[0], str) else group_types[r[0]] for r in relationships] id_groups = [g for g, t in zip(groups, group_types) if t == GroupType.Identity] uniqe_ids = set(sum(id_groups, [])) # id_map is a mapping of str -> Identifier # E.g.: 'A' -> Instance('A', 'doi') id_map = dict(map(lambda x: (x, Identifier.get(x, 'doi')), uniqe_ids)) group_map = [] for g in groups: if isinstance(g[0], str): # Identity group group_map.append( Identifier2Group.query.filter_by( identifier=id_map[g[0]]).one().group) elif isinstance(g[0], int): # GroupM2M group_map.append( GroupM2M.query.filter_by( subgroup=group_map[g[0]]).one().group) rel_map = [] for r in relationships: obj_a, relation, obj_b = r if isinstance(obj_a, str) and isinstance(obj_b, str): # Identifiers relation rel_map.append( Relationship.query.filter_by( source=id_map[obj_a], target=id_map[obj_b], relation=relation).one() ) elif isinstance(obj_a, int) and isinstance(obj_b, int): # Groups relation rel_map.append( GroupRelationship.query.filter_by( source=group_map[obj_a], target=group_map[obj_b], relation=relation).one() ) # Make sure all loaded identifiers are unique assert len(set(map(lambda x: x[1].id, id_map.items()))) == len(id_map) assert Identifier.query.count() == len(id_map) # Make sure there's correct number of Identitfier2Group records # and 'Identity'-type groups assert Identifier2Group.query.count() == len(id_map) assert Group.query.filter_by( type=GroupType.Identity).count() == len(id_groups) assert GroupMetadata.query.count() == len(id_groups) # Make sure that all loaded groups are unique assert len(set(map(lambda x: x.id, group_map))) == len(group_map) assert Group.query.count() == len(group_map) # Make sure there's correct number of GroupM2M records # and 'Version'-type groups m2m_groups = [g for g in groups if isinstance(g[0], int)] assert Group.query.filter_by( type=GroupType.Version).count() == len(m2m_groups) # There are as many M2M groups as there are Identity groups assert GroupM2M.query.count() == len(id_groups) # Make sure that all loaded relationships are unique id_rels = [r for r, t in zip(rel_map, relationship_types) if t is None] assert len(set(map(lambda x: x.id, id_rels))) == len(id_rels) assert Relationship.query.count() == len(id_rels) grp_rels = [r for r, t in zip(rel_map, relationship_types) if t is not None] # Make sure that all loaded groups relationships are unique assert len(set(map(lambda x: x.id, grp_rels))) == len(grp_rels) assert GroupRelationship.query.count() == len(grp_rels) # Make sure that GroupRelationshipM2M are correct id_grp_rels = [r for r, t in zip(rel_map, relationship_types) if t == GroupType.Identity] # There are as many GroupRelationshipM2M objects as Identity Groups assert GroupRelationshipM2M.query.count() == len(id_grp_rels) # Same number of GroupRelationshipMetadata as GRelationships of type ID assert GroupRelationshipMetadata.query.count() == len(id_grp_rels) # There are as many Relationship to GR items as Relationships n_rel2grrels = sum([len(x[1]) for x in relationship_groups if isinstance(rel_map[x[1][0]], Relationship)]) assert Relationship2GroupRelationship.query.count() == n_rel2grrels # Make sure that all GroupRelationshipM2M are matching for group_rel, group_subrels in relationship_groups: for group_subrel in group_subrels: if isinstance(rel_map[group_subrel], Relationship): assert Relationship2GroupRelationship.query.filter_by( relationship=rel_map[group_subrel], group_relationship=rel_map[group_rel]).one() else: # isinstance(rel_map[group_rel], GroupRelationship): assert GroupRelationshipM2M.query.filter_by( relationship=rel_map[group_rel], subrelationship=rel_map[group_subrel]).one()