Esempio n. 1
0
 def __init__(self, model, data):
     self.model = model
     self._data = data
     # Support output from Aleph's linkage API (profile_id):
     self.id = data.get('canonical_id', data.get('profile_id'))
     self.id = self.id or get_entity_id(data.get('canonical'))
     self._canonical = None
     self.entity_id = data.get('entity_id')
     self.entity_id = self.entity_id or get_entity_id(data.get('entity'))
     self._entity = None
     self.decision = data.get('decision')
     self._score = data.get('score', None)
Esempio n. 2
0
 def __init__(self, model, data):
     self.model = model
     self._data = data
     # Support output from Aleph's linkage API (profile_id):
     self.id = data.get("canonical_id", data.get("profile_id"))
     self.id = self.id or get_entity_id(data.get("canonical"))
     self._canonical = None
     self.entity_id = data.get("entity_id")
     self.entity_id = self.entity_id or get_entity_id(data.get("entity"))
     self._entity = None
     self.decision = data.get("decision")
     self._score = data.get("score", None)
Esempio n. 3
0
    def map(self, proxy, record, entities, **kwargs):
        kwargs.update(self.data)

        if self.entity is not None:
            entity = entities.get(self.entity)
            if entity is not None:
                proxy.add(self.prop, get_entity_id(entity))

                # This is really bad in theory, but really useful
                # in practice. Shoot me.
                text = proxy.schema.get('indexText')
                if text is not None:
                    for caption in entity.schema.caption:
                        proxy.add(text, entity.get(caption))

        # clean the values returned by the query, or by using literals, or
        # formats.
        values = []
        for value in self.record_values(record):
            value = self.type.clean(value, **kwargs)
            if value is not None:
                values.append(value)

        if self.join is not None:
            values = [self.join.join(values)]

        if self.split is not None:
            splote = []
            for value in values:
                splote = splote + value.split(self.split)
            values = splote

        proxy.add(self.prop, values)
Esempio n. 4
0
    def map(self, record, entities, **kwargs):
        kwargs.update(self.data)

        if self.entity is not None:
            entity = entities.get(self.entity)
            return ensure_list(get_entity_id(entity))

        # clean the values returned by the query, or by using literals, or
        # formats.
        values = []
        for value in self.record_values(record):
            value = self.type.clean(value, **kwargs)
            if value is not None:
                values.append(value)

        if self.join is not None:
            values = [self.join.join(values)]

        if self.split is not None:
            splote = []
            for value in values:
                splote = splote + value.split(self.split)
            values = splote

        return unique_list(values)
Esempio n. 5
0
    def map(self, proxy, record, entities):
        if self.entity is not None:
            entity = entities.get(self.entity)
            if entity is not None:
                proxy.add(self.prop, get_entity_id(entity))
                inline_names(proxy, entity)

        # clean the values returned by the query, or by using literals, or
        # formats.
        values = []
        for value in self.record_values(record):
            value = self.type.clean(value, proxy=proxy, **self.data)
            if value is not None:
                values.append(value)

        if self.join is not None:
            values = [self.join.join(values)]

        if self.split is not None:
            splote = []
            for value in values:
                splote = splote + value.split(self.split)
            values = splote

        proxy.add(self.prop, values)
Esempio n. 6
0
 def clean(self, text, **kwargs):
     entity_id = get_entity_id(text)
     if entity_id is None:
         return
     entity_id = str(entity_id)
     if self.REGEX.match(entity_id) is not None:
         return entity_id
Esempio n. 7
0
 def resolve(self, subject):
     """Given an entity or entity ID, return the canonicalised ID that
     should be used going forward."""
     subject = get_entity_id(subject)
     cluster = self.clusters.get(subject)
     if cluster is None:
         return subject
     return cluster.id
Esempio n. 8
0
def channel(obj, clazz=None):
    clazz = clazz or type(obj)
    if clazz == str:
        return obj

    obj = get_entity_id(obj)
    if obj is not None:
        return '%s:%s' % (clazz.__name__, obj)
Esempio n. 9
0
def channel(obj, clazz=None):
    clazz = clazz or type(obj)
    if clazz == str:
        return obj

    obj = get_entity_id(obj)
    if obj is not None:
        return '%s:%s' % (clazz.__name__, obj)
Esempio n. 10
0
    def add(self, subject, canonical):
        subject, _ = Namespace.parse(get_entity_id(subject))
        canonical, _ = Namespace.parse(get_entity_id(canonical))

        # Don't do no-ops.
        if subject == canonical:
            return
        if subject is None or canonical is None:
            return

        cluster = Cluster(canonical, subject)
        cluster = self.clusters.get(canonical, cluster)
        if subject in self.clusters:
            previous = self.clusters.get(subject)
            cluster.update(previous.entities)

        for entity in cluster.entities:
            self.clusters[entity] = cluster
Esempio n. 11
0
 def clean(
     self,
     raw: Any,
     fuzzy: bool = False,
     format: Optional[str] = None,
     proxy: Optional["EntityProxy"] = None,
 ) -> Optional[str]:
     entity_id = get_entity_id(raw)
     if entity_id is None:
         return None
     return self.clean_text(entity_id, fuzzy=fuzzy, format=format, proxy=proxy)
Esempio n. 12
0
 def apply(self, proxy, shallow=False):
     """Rewrite an entity proxy so all IDs mentioned are limited to
     the namespace."""
     signed = proxy.clone()
     signed.id = self.sign(proxy.id)
     if not shallow:
         for prop in proxy.iterprops():
             if prop.type != registry.entity:
                 continue
             for value in signed.pop(prop):
                 value = get_entity_id(value)
                 signed.add(prop, self.sign(value))
     return signed
Esempio n. 13
0
 def save(cls,
          session,
          subject,
          candidate,
          score=None,
          judgement=None,
          priority=None):
     obj = cls.by_id(session, subject, candidate)
     if obj is None:
         obj = cls()
         obj.id = cls.make_id(subject, candidate)
         obj.subject, _ = Namespace.parse(get_entity_id(subject))
         obj.candidate, _ = Namespace.parse(get_entity_id(candidate))
     priority = priority or DEFAULT_PRIORITY
     if score is not None:
         obj.score = score
         obj.priority = score * priority
     if judgement is not None:
         obj.judgement = judgement
     obj.updated_at = now()
     session.add(obj)
     return obj
Esempio n. 14
0
 def __init__(self,
              subject,
              prop,
              value,
              weight=1.0,
              inverted=False,
              inferred=False):
     self.subject = subject
     self.prop = prop
     self.value = get_entity_id(value)
     self.weight = weight
     self.inverted = inverted
     self.inferred = inferred
Esempio n. 15
0
def index_notification(event, actor_id, params, channels, sync=False):
    """Index a notification."""
    params = params or {}
    params = {n: get_entity_id(params.get(n)) for n in event.params.keys()}
    channels = list(set([c for c in channels if c is not None]))
    data = {
        'actor_id': actor_id,
        'params': params,
        'event': event.name,
        'channels': channels,
        'created_at': datetime.utcnow(),
    }
    index = notifications_index()
    id_ = hash_data((actor_id, event.name, channels, params))
    return index_safe(index, id_, data, refresh=refresh_sync(sync))
Esempio n. 16
0
    def validate(self, data):
        """Validate that the data should be stored.

        Since the types system doesn't really have validation, this currently
        tries to normalize the value to see if it passes strict parsing.
        """
        values = []
        for val in data:
            if self.stub:
                return gettext('Property cannot be written')
            val = get_entity_id(val)
            if not self.type.validate(val):
                return gettext('Invalid value')
            if val is not None:
                values.append(val)
Esempio n. 17
0
def publish(event, actor_id=None, params=None, channels=None):
    """ Publish a notification to the given channels, while storing
    the parameters and initiating actor for the event. """
    assert isinstance(event, Event), event
    params = params or {}
    outparams = {}
    channels = [channel_tag(c) for c in ensure_list(channels)]
    for name, clazz in event.params.items():
        obj = params.get(name)
        outparams[name] = get_entity_id(obj)
    Notification.publish(event,
                         actor_id=actor_id,
                         params=outparams,
                         channels=channels)
    db.session.flush()
Esempio n. 18
0
    def add(self, subject, canonical):
        subject = get_entity_id(subject)
        canonical = get_entity_id(canonical)

        # Don't do no-ops.
        if subject == canonical:
            return
        if subject is None or canonical is None:
            return
        resolved = self.resolve(canonical)

        # Circular dependencies
        if resolved == subject:
            resolved = max(subject, canonical)
            subject = min(subject, canonical)

        # Find existing references
        subjects = [subject]
        for (src, dst) in self.linkages.items():
            if dst == subject:
                subjects.append(src)
        for sub in subjects:
            if sub != resolved:
                self.linkages[sub] = resolved
Esempio n. 19
0
def _normalize_data(data):
    """Turn entities in properties into entity ids"""
    entities = data['layout']['entities']
    for obj in entities:
        schema = model.get(obj.get('schema'))
        if schema is None:
            raise InvalidData("Invalid schema %s" % obj.get('schema'))
        properties = obj.get('properties', {})
        for name, values in list(properties.items()):
            prop = schema.get(name)
            if prop.type == registry.entity:
                properties[prop.name] = []
                for value in ensure_list(values):
                    entity_id = get_entity_id(value)
                    properties[prop.name].append(entity_id)
    return data
Esempio n. 20
0
    def apply(self, proxy):
        """Rewrite an entity proxy so all IDs mentioned are limited to
        the namespace.

        An exception is made for sameAs declarations."""
        signed = proxy.clone()
        signed.id = self.sign(proxy.id)
        for prop in proxy.iterprops():
            if prop.type != registry.entity:
                continue
            for value in signed.pop(prop):
                value = get_entity_id(value)
                signed.add(prop, self.sign(value))
        # linked.add('sameAs', proxy.id, quiet=True)
        signed.remove('sameAs', signed.id)
        return signed
Esempio n. 21
0
def publish(event, actor_id=None, params=None, channels=None):
    """ Publish a notification to the given channels, while storing
    the parameters and initiating actor for the event. """
    assert isinstance(event, Event), event
    params = params or {}
    outparams = {}
    channels = ensure_list(channels)
    channels.append(channel(actor_id, clazz=Role))
    for name, clazz in event.params.items():
        obj = params.get(name)
        outparams[name] = get_entity_id(obj)
        channels.append(channel(obj, clazz=clazz))
    Notification.publish(event,
                         actor_id=actor_id,
                         params=outparams,
                         channels=channels)
    db.session.flush()
Esempio n. 22
0
def index_notification(event, actor_id, params, channels, sync=False):
    """Index a notification."""
    params = params or {}
    data = {}
    for param, value in params.items():
        value = get_entity_id(value)
        if value is not None:
            data[param] = str(value)
    channels = list(set([c for c in channels if c is not None]))
    data = {
        "actor_id": actor_id,
        "params": data,
        "event": event.name,
        "channels": channels,
        "created_at": datetime.utcnow(),
    }
    index = notifications_index()
    id_ = hash_data((actor_id, event.name, channels, params))
    return index_safe(index, id_, data, sync=sync)
Esempio n. 23
0
 def resolve(self, subject):
     """Given an entity or entity ID, return the canonicalised ID that
     should be used going forward."""
     subject = get_entity_id(subject)
     return self.linkages.get(subject, subject)
Esempio n. 24
0
 def has(self, subject):
     subject = get_entity_id(subject)
     return subject in self.linkages
Esempio n. 25
0
 def clean(self, text, **kwargs):
     entity_id = get_entity_id(text)
     if self.validate(entity_id):
         return entity_id
Esempio n. 26
0
 def __init__(self, subject, canonical, judgement):
     self.subject = get_entity_id(subject)
     self.canonical = get_entity_id(canonical)
     self.judgement = judgement or self.UNSURE
Esempio n. 27
0
 def entity(self, entity):
     self._entity = entity
     self.entity_id = get_entity_id(entity)
Esempio n. 28
0
 def canonical(self, entity):
     self._canonical = entity
     self.id = get_entity_id(entity)
Esempio n. 29
0
 def clean(self, text, **kwargs):
     return get_entity_id(text)
Esempio n. 30
0
 def make_id(cls, subject, candidate):
     subject, _ = Namespace.parse(get_entity_id(subject))
     candidate, _ = Namespace.parse(get_entity_id(candidate))
     return '.'.join((subject, candidate))
Esempio n. 31
0
 def __init__(self, type_, value):
     self.type = type_
     self.value = get_entity_id(value)
     self.uri = self.type.rdf(self.value)
Esempio n. 32
0
 def has(self, subject):
     subject = get_entity_id(subject)
     return subject in self.clusters