def add(self, subject, canonical): subject, _ = Namespace.parse(get_entity_id(subject)) canonical, _ = Namespace.parse(get_entity_id(canonical)) # Don't do no-ops. if subject == canonical: return if subject is None or canonical is None: return cluster = Cluster(canonical, subject) cluster = self.clusters.get(canonical, cluster) if subject in self.clusters: previous = self.clusters.get(subject) cluster.update(previous.entities) for entity in cluster.entities: self.clusters[entity] = cluster
def delete_aggregator_entity(collection, entity_id): aggregator = get_aggregator(collection) try: entity_id = collection.ns.sign(entity_id) aggregator.delete(entity_id=entity_id) base_id, _ = Namespace.parse(entity_id) aggregator.delete(entity_id=base_id) finally: aggregator.close()
def by_id(cls, document_id, collection=None): try: document_id, _ = Namespace.parse(document_id) document_id = int(document_id) except Exception: return q = cls.all() q = q.filter(cls.id == document_id) if collection is not None: q = q.filter(cls.collection_id == collection.id) return q.first()
def save(cls, session, subject, candidate, score=None, judgement=None, priority=None): obj = cls.by_id(session, subject, candidate) if obj is None: obj = cls() obj.id = cls.make_id(subject, candidate) obj.subject, _ = Namespace.parse(get_entity_id(subject)) obj.candidate, _ = Namespace.parse(get_entity_id(candidate)) priority = priority or DEFAULT_PRIORITY if score is not None: obj.score = score obj.priority = score * priority if judgement is not None: obj.judgement = judgement obj.updated_at = now() session.add(obj) return obj
def by_id(cls, entity_id, collection_id=None): entity_id, _ = Namespace.parse(entity_id) q = cls.all() q = q.filter(cls.id == entity_id) return q.first()
def check_entity_id(value): value, _ = Namespace.parse(value) if not registry.entity.validate(value): msg = gettext('Invalid entity ID: %s') raise ValueError(msg % value) return True
def make_id(cls, subject, candidate): subject, _ = Namespace.parse(get_entity_id(subject)) candidate, _ = Namespace.parse(get_entity_id(candidate)) return '.'.join((subject, candidate))
def __init__(self, subject, canonical, judgement): self.subject, _ = Namespace.parse(get_entity_id(subject)) self.canonical, _ = Namespace.parse(get_entity_id(canonical)) self.judgement = judgement or self.UNSURE