Example #1
0
 def to_dict(self):
     return {
         'start': self.data['start'],
         'end': self.data['end'],
         'length': self.length,
         'types': self.types,
         'labels': self.labels,
         'start_entity_id': self.start_entity_id,
         'end_collection_id': self.end_collection_id,
         'nodes': unwind(self.data['paths']),
     }
Example #2
0
    def from_data(cls, start_entity, end_collection_id, paths, types,
                  labels, start, end):
        obj = cls()
        obj.start_entity_id = start_entity.id
        obj.end_collection_id = end_collection_id

        obj.labels = unwind(labels)
        obj.types = unwind(types)

        lengths = map(len, paths)
        obj.length = min(lengths) - 1
        average = (float(sum(lengths)) - len(lengths)) / float(len(lengths))
        obj.weight = len(paths) * (1.0 / max(1.0, average))
        obj.data = {
            'start': start,
            'end': end,
            'paths': paths
        }
        db.session.add(obj)
        return obj
Example #3
0
def generate_paths(graph, entity, ignore_types=SKIP_TYPES):
    """Generate all possible paths which end in a different collection."""
    Path.delete_by_entity(entity.id)
    if graph is None or entity.state != entity.STATE_ACTIVE:
        return
    log.info("Generating graph path cache: %r", entity)
    # TODO: should max path length be configurable?
    q = "MATCH pth = (start:Aleph:Entity)-[*1..3]-(end:Aleph:Entity) " \
        "MATCH (start)-[startpart:PART_OF]->(startcoll:Collection) " \
        "MATCH (end)-[endpart:PART_OF]->(endcoll:Collection) " \
        "WHERE start.fingerprint = {entity_fp} AND " \
        "startpart.alephCanonical = {entity_id} AND " \
        "startcoll.alephCollection <> endcoll.alephCollection AND " \
        "all(r IN relationships(pth) WHERE NOT type(r) IN {ignore_types}) " \
        "WITH DISTINCT start, end, " \
        " COLLECT(DISTINCT extract(x IN nodes(pth) | x.id)) AS paths, " \
        " COLLECT(DISTINCT extract(x IN nodes(pth) | labels(x))) AS labels, " \
        " COLLECT(DISTINCT extract(r IN relationships(pth) | type(r))) AS types, " \
        " COLLECT(DISTINCT endcoll.alephCollection) AS end_collection_id " \
        "RETURN start, end, paths, types, labels, end_collection_id "
    count = 0
    for row in graph.run(q,
                         entity_id=entity.id,
                         entity_fp=entity.fingerprint,
                         ignore_types=ignore_types):
        labels = unwind(row.get('labels'))
        labels = [l for l in labels if l != BASE_NODE]
        types = unwind(row.get('types'))
        if len(types) == 1 and 'AKA' in types:
            continue
        Path.from_data(entity, row.get('end_collection_id'), row.get('paths'),
                       types, labels, NodeType.dict(row.get('start')),
                       NodeType.dict(row.get('end')))
        count += 1
    db.session.commit()
    # TODO: send email to collection owners?
    log.info("Generated %s paths for %r", count, entity)