Ejemplo n.º 1
0
def create_objects_from_relations(relationships: List[Tuple],
                                  metadata: List[Tuple[dict]] = None):
    """Given a list of relationships, create all corresponding DB objects.

    Optional 'metadata' list can be passed, which contains corresponding
    metadata items for each of the relationships, i.e., a triplet of Source,
    Relation and Target metadata.

    E.g.:
        relationships = [
            ('A', Relation.Cites, 'B'),
            ('C', Relation.Cites, 'D'),
        ]

        metadata = [
            ({<source-1>}, {<relation-1>}, {<target-1>}),
            ({<source-2>}, {<relation-2>}, {<target-2>}),
        ]

        Will create Identifier, Relationship, Group and all M2M objects.
    """
    if not metadata:
        metadata = [({}, {}, {}) for _ in range(len(relationships))]
    assert len(relationships) == len(metadata)
    identifiers = sorted(set(sum([[a, b] for a, _, b in relationships], [])))
    groups = []  # Contains pairs of (Identifier2Group, Group2Group)
    for i in identifiers:
        id_ = Identifier(value=i, scheme='doi')
        db.session.add(id_)
        groups.append(get_or_create_groups(id_))
    rel_obj = []
    for (src, rel, tar), (src_m, rel_m, tar_m) in zip(relationships, metadata):
        src_, tar_ = Identifier.get(src, 'doi'), \
            Identifier.get(tar, 'doi')
        r = Relationship(source=src_, target=tar_, relation=rel)
        db.session.add(r)
        rel_obj.append(r)
        s_id_gr, s_ver_gr = groups[identifiers.index(src)]
        t_id_gr, t_ver_gr = groups[identifiers.index(tar)]
        id_gr_rel = GroupRelationship(
            source=s_id_gr, target=t_id_gr, relation=rel,
            type=GroupType.Identity, id=uuid.uuid4())
        s_id_gr.data.update(src_m, validate=False)
        t_id_gr.data.update(tar_m, validate=False)

        grm = GroupRelationshipMetadata(group_relationship_id=id_gr_rel.id)
        db.session.add(grm)
        grm.update(rel_m, validate=False)
        db.session.add(Relationship2GroupRelationship(
            relationship=r, group_relationship=id_gr_rel))
        db.session.add(id_gr_rel)
        ver_gr_rel = GroupRelationship(
            source=s_ver_gr, target=t_ver_gr, relation=rel,
            type=GroupType.Version)
        db.session.add(GroupRelationshipM2M(
            relationship=ver_gr_rel, subrelationship=id_gr_rel))
        db.session.add(ver_gr_rel)
    db.session.commit()
Ejemplo n.º 2
0
def test_get_or_create_groups(db):
    """Test creating groups (Identity and Version) for an identifier."""
    id1 = Identifier(value='A', scheme='doi')
    db.session.add(id1)
    # id2 = Identifier(value='B', scheme='doi')
    # rel = Relationship(source=id1, target=id2,
    #                    relation=Relation.IsIdenticalTo)
    assert not Group.query.count()
    assert not GroupM2M.query.count()
    assert not Identifier2Group.query.count()
    id_g, ver_g = get_or_create_groups(id1)
    db.session.commit()

    def _check_groups(identifier, id_g, ver_g):
        assert Group.query.count() == 2
        assert GroupM2M.query.count() == 1
        assert Identifier2Group.query.count() == 1
        assert Group.query.filter_by(type=GroupType.Identity).one() == id_g
        assert Group.query.filter_by(type=GroupType.Version).one() == ver_g
        id2g = Identifier2Group.query.one()
        g2g = GroupM2M.query.one()
        assert id2g.identifier == identifier
        assert id2g.group == id_g
        assert g2g.group == ver_g
        assert g2g.subgroup == id_g

    _check_groups(id1, id_g, ver_g)

    # Fetch the ID again and try to create groups again
    id2 = Identifier.get('A', 'doi')
    assert id2
    id_g, ver_g = get_or_create_groups(id1)
    db.session.commit()

    # Make sure nothing changed
    _check_groups(id2, id_g, ver_g)

    # Add a new, separate identifier
    id3 = Identifier(value='B', scheme='doi')
    db.session.add(id3)
    id_g, ver_g = get_or_create_groups(id3)

    assert Group.query.count() == 4
    assert GroupM2M.query.count() == 2
    assert Identifier2Group.query.count() == 2
Ejemplo n.º 3
0
def id_obj(identifier, scheme):
    """Identifier DB object generator."""
    return Identifier(value=identifier, scheme=scheme)
Ejemplo n.º 4
0
def assert_grouping(grouping):
    """Determine if database state corresponds to 'grouping' definition.

    See tests in test_grouping.py for example input.
    """
    groups, relationships, relationship_groups = grouping
    group_types = [
        (GroupType.Identity if isinstance(g[0], str) else GroupType.Version)
        for g in groups]

    # Mapping 'relationship_types' is a mapping between relationship index to:
    # * None if its a regular Relation between Identifiers
    # * GroupType.Identity if it's a relation between 'Identity'-type Groups
    # * GroupType.Version if it's a relation between 'Version'-type Groups
    relationship_types = [None if isinstance(r[0], str) else group_types[r[0]]
                          for r in relationships]

    id_groups = [g for g, t in zip(groups, group_types)
                 if t == GroupType.Identity]
    uniqe_ids = set(sum(id_groups, []))

    # id_map is a mapping of str -> Identifier
    # E.g.: 'A' -> Instance('A', 'doi')
    id_map = dict(map(lambda x: (x, Identifier.get(x, 'doi')), uniqe_ids))

    group_map = []
    for g in groups:
        if isinstance(g[0], str):  # Identity group
            group_map.append(
                Identifier2Group.query.filter_by(
                    identifier=id_map[g[0]]).one().group)
        elif isinstance(g[0], int):  # GroupM2M
            group_map.append(
                GroupM2M.query.filter_by(
                    subgroup=group_map[g[0]]).one().group)

    rel_map = []
    for r in relationships:
        obj_a, relation, obj_b = r

        if isinstance(obj_a, str) and isinstance(obj_b, str):
            # Identifiers relation
            rel_map.append(
                Relationship.query.filter_by(
                    source=id_map[obj_a], target=id_map[obj_b],
                    relation=relation).one()
            )
        elif isinstance(obj_a, int) and isinstance(obj_b, int):
            # Groups relation
            rel_map.append(
                GroupRelationship.query.filter_by(
                    source=group_map[obj_a], target=group_map[obj_b],
                    relation=relation).one()
            )

    # Make sure all loaded identifiers are unique
    assert len(set(map(lambda x: x[1].id, id_map.items()))) == len(id_map)
    assert Identifier.query.count() == len(id_map)

    # Make sure there's correct number of Identitfier2Group records
    # and 'Identity'-type groups
    assert Identifier2Group.query.count() == len(id_map)
    assert Group.query.filter_by(
        type=GroupType.Identity).count() == len(id_groups)

    assert GroupMetadata.query.count() == len(id_groups)

    # Make sure that all loaded groups are unique
    assert len(set(map(lambda x: x.id, group_map))) == len(group_map)
    assert Group.query.count() == len(group_map)

    # Make sure there's correct number of GroupM2M records
    # and 'Version'-type groups
    m2m_groups = [g for g in groups if isinstance(g[0], int)]
    assert Group.query.filter_by(
        type=GroupType.Version).count() == len(m2m_groups)
    # There are as many M2M groups as there are Identity groups
    assert GroupM2M.query.count() == len(id_groups)

    # Make sure that all loaded relationships are unique
    id_rels = [r for r, t in zip(rel_map, relationship_types)
               if t is None]
    assert len(set(map(lambda x: x.id, id_rels))) == len(id_rels)
    assert Relationship.query.count() == len(id_rels)

    grp_rels = [r for r, t in zip(rel_map, relationship_types)
                if t is not None]
    # Make sure that all loaded groups relationships are unique
    assert len(set(map(lambda x: x.id, grp_rels))) == len(grp_rels)
    assert GroupRelationship.query.count() == len(grp_rels)

    # Make sure that GroupRelationshipM2M are correct
    id_grp_rels = [r for r, t in zip(rel_map, relationship_types)
                   if t == GroupType.Identity]
    # There are as many GroupRelationshipM2M objects as Identity Groups
    assert GroupRelationshipM2M.query.count() == len(id_grp_rels)

    # Same number of GroupRelationshipMetadata as GRelationships of type ID
    assert GroupRelationshipMetadata.query.count() == len(id_grp_rels)

    # There are as many Relationship to GR items as Relationships
    n_rel2grrels = sum([len(x[1]) for x in relationship_groups
                        if isinstance(rel_map[x[1][0]], Relationship)])
    assert Relationship2GroupRelationship.query.count() == n_rel2grrels

    # Make sure that all GroupRelationshipM2M are matching
    for group_rel, group_subrels in relationship_groups:
        for group_subrel in group_subrels:
            if isinstance(rel_map[group_subrel], Relationship):
                assert Relationship2GroupRelationship.query.filter_by(
                    relationship=rel_map[group_subrel],
                    group_relationship=rel_map[group_rel]).one()
            else:  # isinstance(rel_map[group_rel], GroupRelationship):
                assert GroupRelationshipM2M.query.filter_by(
                    relationship=rel_map[group_rel],
                    subrelationship=rel_map[group_subrel]).one()