Beispiel #1
0
class Link(db.Model, UuidModel, SoftDeleteModel):
    type = db.Column(db.String(255), index=True)
    source_id = db.Column(db.String(254), index=True)
    target_id = db.Column(db.String(254), index=True)
    foreign_ids = db.Column(ARRAY(db.Unicode()))
    data = db.Column('data', JSONB)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('links',
                                                    lazy='dynamic'))  # noqa

    @property
    def schema(self):
        return schemata.get(self.type)

    def to_dict(self):
        data = super(Link, self).to_dict()
        data.update({
            'schema': self.type,
            'data': self.data,
            'foreign_ids': self.foreign_ids or [],
            'collection_id': self.collection_id
        })
        return data

    def __repr__(self):
        return '<Link(%r, %r, %r)>' % (self.id, self.source_id, self.target_id)
Beispiel #2
0
class Reference(db.Model, IdModel, DatedModel):
    id = db.Column(db.Integer(), primary_key=True)
    document_id = db.Column(db.BigInteger, db.ForeignKey('document.id'))
    entity_id = db.Column(db.String(32), db.ForeignKey('entity.id'))
    origin = db.Column(db.String(128))
    weight = db.Column(db.Integer)

    entity = db.relationship('Entity',
                             backref=db.backref('references', lazy='dynamic'))
    document = db.relationship('Document',
                               backref=db.backref('references',
                                                  lazy='dynamic'))

    def to_dict(self):
        return {
            'entity': {
                'id': self.entity.id,
                'name': self.entity.name,
                '$schema': self.entity.type
            },
            'weight': self.weight,
            'origin': self.origin
        }

    def __repr__(self):
        return '<Reference(%r, %r)>' % (self.document_id, self.entity_id)
class Match(db.Model, IdModel, DatedModel):
    entity_id = db.Column(db.String(64))
    document_id = db.Column(db.BigInteger())
    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)
    match_id = db.Column(db.String(64))
    match_collection_id = db.Column(db.Integer,
                                    db.ForeignKey('collection.id'),
                                    index=True)
    score = db.Column(db.Float(), nullable=True)

    @classmethod
    def find_by_collection(cls, collection_id, other_id):
        q = Match.all()
        q = q.filter(Match.collection_id == collection_id)
        q = q.filter(Match.document_id == None)  # noqa
        q = q.filter(Match.match_collection_id == other_id)
        q = q.order_by(Match.score.desc())
        q = q.order_by(Match.id)
        return q

    @classmethod
    def delete_by_collection(cls, collection_id, deleted_at=None):
        q = db.session.query(cls)
        q = q.filter(
            or_(cls.collection_id == collection_id,
                cls.match_collection_id == collection_id))
        q.delete(synchronize_session=False)

    @classmethod
    def group_by_collection(cls, collection_id, authz=None):
        from aleph.model import Collection, Permission
        cnt = func.count(Match.id).label('matches')
        parent = Match.collection_id.label('parent')
        coll = aliased(Collection, name='collection')
        q = db.session.query(cnt, parent)
        q = q.filter(Match.collection_id == collection_id)
        q = q.filter(Match.document_id == None)  # noqa
        q = q.filter(Match.match_collection_id != collection_id)
        q = q.join(coll, Match.match_collection_id == coll.id)
        q = q.filter(coll.deleted_at == None)  # noqa
        if authz is not None and not authz.is_admin:
            q = q.join(Permission,
                       Match.match_collection_id == Permission.collection_id)
            q = q.filter(Permission.deleted_at == None)  # noqa
            q = q.filter(Permission.read == True)  # noqa
            q = q.filter(Permission.role_id.in_(authz.roles))
        q = q.add_entity(coll)
        q = q.group_by(coll, parent)
        q = q.order_by(cnt.desc())
        q = q.order_by(parent.asc())
        return q

    def __repr__(self):
        return 'Match(%r, %r, %r, %r)' % (self.entity_id, self.document_id,
                                          self.match_id, self.score)
Beispiel #4
0
class EntityIdentity(db.Model, IdModel, DatedModel):
    CONFIRMED = 1
    REJECTED = 2
    UNDECIDED = 3

    JUDGEMENTS = [1, 2, 3]

    entity_id = db.Column(db.String(32),
                          db.ForeignKey('entity.id'),
                          index=True)  # noqa
    entity = db.relationship('Entity',
                             backref=db.backref('identities',
                                                lazy='dynamic'))  # noqa
    match_id = db.Column(db.String(254), index=True, nullable=False)
    judgement = db.Column(db.Integer(), nullable=False)
    judge_id = db.Column(db.Integer, db.ForeignKey('role.id'), nullable=True)

    @classmethod
    def judgements_by_entity(cls, entity_id):
        q = db.session.query(cls.match_id, cls.judgement)
        q = q.filter(cls.entity_id == entity_id)
        return {k: v for k, v in q.all()}

    @classmethod
    def entity_ids(cls, entity_id):
        q = db.session.query(cls.match_id)
        q = q.filter(cls.entity_id == entity_id)
        q = q.filter(cls.judgement == cls.CONFIRMED)
        ids = [entity_id]
        for mapped_id, in q.all():
            ids.append(mapped_id)
        return ids

    @classmethod
    def by_entity_match(cls, entity_id, match_id):
        q = db.session.query(cls)
        q = q.filter(cls.entity_id == entity_id)
        q = q.filter(cls.match_id == match_id)
        return q.first()

    @classmethod
    def save(cls, entity_id, match_id, judgement, judge=None):
        obj = cls.by_entity_match(entity_id, match_id)
        if obj is None:
            obj = cls()
            obj.entity_id = entity_id
            obj.match_id = match_id
        obj.judgement = judgement
        obj.judge = judge
        db.session.add(obj)
        return obj

    def __repr__(self):
        return 'EntityIdentity(%r, %r, %r)' % (self.entity_id, self.match_id,
                                               self.judgement)
Beispiel #5
0
class Notification(db.Model, IdModel, DatedModel):
    GLOBAL = 'Global'

    _event = db.Column('event', db.String(255), nullable=False)
    channels = db.Column(ARRAY(db.String(255)), index=True)
    params = db.Column(JSONB)

    actor_id = db.Column(db.Integer, db.ForeignKey('role.id'), nullable=True)
    actor = db.relationship(Role)

    @hybrid_property
    def event(self):
        return Events.get(self._event)

    @event.setter
    def event(self, event):
        self._event = event.name

    def iterparams(self):
        if self.actor_id is not None:
            yield 'actor', Role, self.actor_id
        if self.event is None:
            return
        for name, clazz in self.event.params.items():
            value = self.params.get(name)
            if value is not None:
                yield name, clazz, value

    @classmethod
    def publish(cls, event, actor_id=None, channels=[], params={}):
        notf = cls()
        notf.event = event
        notf.actor_id = actor_id
        notf.params = params
        notf.channels = list(set([c for c in channels if c is not None]))
        db.session.add(notf)
        return notf

    @classmethod
    def by_role(cls, role):
        sq = db.session.query(Subscription.channel)
        sq = sq.filter(Subscription.deleted_at == None)  # noqa
        sq = sq.filter(Subscription.role_id == role.id)
        sq = sq.cte('sq')
        q = cls.all()
        q = q.filter(or_(
            cls.actor_id != role.id,
            cls.actor_id == None  # noqa
        ))
        q = q.filter(cls.channels.any(sq.c.channel))
        q = q.filter(cls._event.in_(Events.names()))
        q = q.order_by(cls.created_at.desc())
        q = q.order_by(cls.id.desc())
        return q
Beispiel #6
0
class EntityOtherName(db.Model, EntityDetails):
    _schema = '/entity/other_name.json#'

    entity_id = db.Column(db.String(32), db.ForeignKey('entity.id'), index=True)
    entity = db.relationship('Entity', primaryjoin="and_(Entity.id == foreign(EntityOtherName.entity_id), "  # noqa
                                                        "EntityOtherName.deleted_at == None)",  # noqa
                             backref=db.backref('other_names', lazy='dynamic', cascade='all, delete-orphan'))  # noqa
    name = db.Column(db.Unicode)
    note = db.Column(db.Unicode)
    family_name = db.Column(db.Unicode)
    given_name = db.Column(db.Unicode)
    additional_name = db.Column(db.Unicode)
    honorific_prefix = db.Column(db.Unicode)
    honorific_suffix = db.Column(db.Unicode)
    patronymic_name = db.Column(db.Unicode)
    start_date = db.Column(db.DateTime)
    end_date = db.Column(db.DateTime)

    @property
    def display_name(self):
        if self.name is not None:
            return self.name
        return ''

    @property
    def terms(self):
        return [self.display_name]

    def to_dict(self):
        data = super(EntityOtherName, self).to_dict()
        data['display_name'] = self.display_name
        return data
Beispiel #7
0
class UuidModel(object):
    id = db.Column(db.String(32), primary_key=True, default=make_textid,
                   nullable=False, unique=False)

    def to_dict(self):
        parent = super(UuidModel, self)
        data = parent.to_dict() if hasattr(parent, 'to_dict') else {}
        data['id'] = self.id
        return data
Beispiel #8
0
class Reference(db.Model, IdModel, DatedModel):
    id = db.Column(db.Integer(), primary_key=True)
    origin = db.Column(db.String(128))
    weight = db.Column(db.Integer)

    document_id = db.Column(db.BigInteger,
                            db.ForeignKey('document.id'),
                            index=True)  # noqa
    document = db.relationship('Document',
                               backref=db.backref('references',
                                                  lazy='dynamic'))  # noqa

    entity_id = db.Column(db.String(32),
                          db.ForeignKey('entity.id'),
                          index=True)  # noqa
    entity = db.relationship('Entity',
                             backref=db.backref('references',
                                                lazy='dynamic'))  # noqa

    @classmethod
    def index_references(cls, document_id):
        """Helper function to get reference data for indexing."""
        # cf. aleph.index.entities.generate_entities()
        from aleph.model.entity import Entity
        q = db.session.query(Reference.entity_id, Entity.collection_id)
        q = q.filter(Reference.document_id == document_id)
        q = q.filter(Entity.id == Reference.entity_id)
        q = q.filter(Entity.state == Entity.STATE_ACTIVE)
        return q.all()

    def to_dict(self):
        return {
            'entity': {
                'id': self.entity.id,
                'name': self.entity.name,
                '$schema': self.entity.type
            },
            'weight': self.weight,
            'origin': self.origin
        }

    def __repr__(self):
        return '<Reference(%r, %r)>' % (self.document_id, self.entity_id)
Beispiel #9
0
class EntityIdentifier(db.Model, EntityDetails):
    _schema = '/entity/identifier.json#'
    __tablename__ = 'entity_identifier'

    entity_id = db.Column(db.String(32), db.ForeignKey('entity.id'), index=True)
    entity = db.relationship('Entity', primaryjoin="and_(Entity.id == foreign(EntityIdentifier.entity_id), "  # noqa
                                                        "EntityIdentifier.deleted_at == None)",  # noqa
                             backref=db.backref('identifiers', lazy='dynamic', cascade='all, delete-orphan'))  # noqa
    identifier = db.Column(db.Unicode)
    scheme = db.Column(db.Unicode)
Beispiel #10
0
class EntityBuilding(EntityAsset):
    _schema = '/entity/building.json#'
    __mapper_args__ = {'polymorphic_identity': _schema}

    building_address_id = db.Column(db.String(32),
                                    db.ForeignKey('entity_address.id'))  # noqa
    building_address = db.relationship(
        'EntityAddress',
        primaryjoin=
        "and_(EntityAddress.id == foreign(EntityBuilding.building_address_id), "  # noqa
        "EntityAddress.deleted_at == None)")  # noqa
Beispiel #11
0
class EntityLegalPerson(Entity):
    _schema = 'entity/legal_person.json#'
    __mapper_args__ = {'polymorphic_identity': _schema}

    image = db.Column(db.Unicode, nullable=True)
    postal_address_id = db.Column(db.String(32),
                                  db.ForeignKey('entity_address.id'))  # noqa
    postal_address = db.relationship(
        'EntityAddress',
        primaryjoin=
        "and_(EntityAddress.id == foreign(EntityLegalPerson.postal_address_id), "  # noqa
        "EntityAddress.deleted_at == None)")  # noqa
Beispiel #12
0
class Reference(db.Model, IdModel, DatedModel):
    id = db.Column(db.Integer(), primary_key=True)
    document_id = db.Column(db.BigInteger, db.ForeignKey('document.id'))
    entity_id = db.Column(db.String(32), db.ForeignKey('entity.id'))
    origin = db.Column(db.String(128))
    weight = db.Column(db.Integer)

    entity = db.relationship(Entity,
                             backref=db.backref('references', lazy='dynamic'))
    document = db.relationship(Document,
                               backref=db.backref('references',
                                                  lazy='dynamic'))

    @classmethod
    def delete_document(cls, document_id, origin=None):
        q = cls.all().filter_by(document_id=document_id)
        if origin is not None:
            q = q.filter_by(origin=origin)
        q.delete(synchronize_session='fetch')

    def __repr__(self):
        return '<Reference(%r, %r)>' % (self.document_id, self.entity_id)
Beispiel #13
0
class EntityContactDetail(db.Model, EntityDetails):
    _schema = '/entity/contact_detail.json#'

    entity_id = db.Column(db.String(32), db.ForeignKey('entity.id'), index=True)
    entity = db.relationship('EntityLegalPerson', primaryjoin="and_(Entity.id == foreign(EntityContactDetail.entity_id), "  # noqa
                                                              "EntityContactDetail.deleted_at == None)",  # noqa
                             backref=db.backref('contact_details', lazy='dynamic', cascade='all, delete-orphan'))  # noqa

    label = db.Column(db.Unicode)
    type = db.Column(db.Unicode)
    note = db.Column(db.Unicode)
    valid_from = db.Column(db.DateTime)
    valid_until = db.Column(db.DateTime)
Beispiel #14
0
 def by_channels(cls, channels, since=None, exclude_actor_id=None):
     channels = cast(channels, ARRAY(db.String(255)))
     q = cls.all()
     q = q.filter(cls.channels.overlap(channels))
     # q = q.filter(cls.channels.any(channel))
     q = q.filter(cls._event.in_(Events.names()))
     if exclude_actor_id is not None:
         q = q.filter(cls.actor_id != exclude_actor_id)
     if since is not None:
         q = q.filter(cls.created_at >= since)
     q = q.order_by(cls.created_at.desc())
     q = q.order_by(cls.id.desc())
     return q
Beispiel #15
0
class EntityPerson(EntityLegalPerson):
    _schema = '/entity/person.json#'
    __mapper_args__ = {'polymorphic_identity': _schema}

    gender = db.Column(db.Unicode, nullable=True)
    birth_date = db.Column(db.Unicode, nullable=True)
    death_date = db.Column(db.Unicode, nullable=True)

    residential_address_id = db.Column(
        db.String(32), db.ForeignKey('entity_address.id'))  # noqa
    residential_address = db.relationship(
        'EntityAddress',
        primaryjoin=
        "and_(EntityAddress.id == foreign(EntityPerson.residential_address_id), "  # noqa
        "EntityAddress.deleted_at == None)")  # noqa
Beispiel #16
0
class EntityOrganization(EntityLegalPerson):
    _schema = '/entity/organization.json#'
    __mapper_args__ = {'polymorphic_identity': _schema}

    classification = db.Column(db.Unicode, nullable=True)
    founding_date = db.Column(db.Unicode, nullable=True)
    dissolution_date = db.Column(db.Unicode, nullable=True)
    current_status = db.Column(db.Unicode, nullable=True)

    registered_address_id = db.Column(
        db.String(32), db.ForeignKey('entity_address.id'))  # noqa
    registered_address = db.relationship(
        'EntityAddress',
        primaryjoin=
        "and_(EntityAddress.id == foreign(EntityOrganization.registered_address_id), "  # noqa
        "EntityAddress.deleted_at == None)")  # noqa

    headquarters_address_id = db.Column(
        db.String(32), db.ForeignKey('entity_address.id'))  # noqa
    headquarters_address = db.relationship(
        'EntityAddress',
        primaryjoin=
        "and_(EntityAddress.id == foreign(EntityOrganization.headquarters_address_id), "  # noqa
        "EntityAddress.deleted_at == None)")  # noqa
Beispiel #17
0
 def by_channels(cls, channels, role, since=None):
     channels = cast(channels, ARRAY(db.String(255)))
     q = cls.all()
     q = q.filter(cls.channels.overlap(channels))
     q = q.filter(cls._event.in_(Events.names()))
     q = q.filter(or_(
         cls.actor_id != role.id,
         cls.actor_id == None  # noqa
     ))
     since = since or role.notified_at
     if since is not None and role.notified_at is not None:
         since = max(since, role.notified_at)
     if since is not None:
         q = q.filter(cls.created_at >= since)
     q = q.order_by(cls.created_at.desc())
     return q
Beispiel #18
0
class Subscription(db.Model, IdModel, SoftDeleteModel):
    channel = db.Column(db.String(255), index=True)
    role_id = db.Column(db.Integer, db.ForeignKey('role.id'), index=True)
    role = db.relationship(Role)

    @classmethod
    def find(cls, channel=None, role_id=None, deleted=False):
        q = cls.all(deleted=deleted)
        if channel is not None:
            q = q.filter(cls.channel == channel)
        if role_id is not None:
            q = q.filter(cls.role_id == role_id)
        return q.first()

    @classmethod
    def subscribe(cls, role, channel):
        subscription = cls.find(channel=channel, role_id=role.id)
        if subscription is None:
            subscription = cls()
        subscription.channel = channel
        subscription.role_id = role.id
        subscription.deleted_at = None
        db.session.add(subscription)
        return subscription

    @classmethod
    def unsubscribe(cls, role=None, channel=None, deleted_at=None):
        assert role is not None or channel is not None
        if deleted_at is None:
            deleted_at = datetime.utcnow()
        q = db.session.query(cls)
        if role is not None:
            q = q.filter(cls.role_id == role.id)
        if channel is not None:
            q = q.filter(cls.channel == channel)
        q.update({cls.deleted_at: deleted_at},
                 synchronize_session=False)
Beispiel #19
0
class Entity(db.Model, DatedModel):
    THING = "Thing"
    LEGAL_ENTITY = "LegalEntity"

    id = db.Column(
        db.String(ENTITY_ID_LEN),
        primary_key=True,
        default=make_textid,
        nullable=False,
        unique=False,
    )
    schema = db.Column(db.String(255), index=True)
    data = db.Column("data", JSONB)

    role_id = db.Column(db.Integer, db.ForeignKey("role.id"),
                        nullable=True)  # noqa
    collection_id = db.Column(db.Integer,
                              db.ForeignKey("collection.id"),
                              index=True)
    collection = db.relationship(Collection,
                                 backref=db.backref("entities",
                                                    lazy="dynamic"))

    @property
    def model(self):
        return model.get(self.schema)

    def update(self, data, collection):
        proxy = model.get_proxy(data, cleaned=False)
        proxy = collection.ns.apply(proxy)
        self.id = collection.ns.sign(self.id)
        self.schema = proxy.schema.name
        self.updated_at = datetime.utcnow()
        previous = self.to_proxy()
        for prop in proxy.schema.properties.values():
            # Do not allow the user to overwrite hashes because this could
            # lead to a user accessing random objects.
            if prop.type == registry.checksum:
                prev = previous.get(prop)
                proxy.set(prop, prev, cleaned=True, quiet=True)
        self.data = proxy.properties
        db.session.add(self)

    def to_proxy(self):
        data = {
            "id": self.id,
            "schema": self.schema,
            "properties": self.data,
            "created_at": iso_text(self.created_at),
            "updated_at": iso_text(self.updated_at),
            "role_id": self.role_id,
            "mutable": True,
        }
        return model.get_proxy(data, cleaned=False)

    @classmethod
    def create(cls, data, collection, role_id=None):
        entity = cls()
        entity_id = data.get("id") or make_textid()
        if not registry.entity.validate(entity_id):
            raise InvalidData(gettext("Invalid entity ID"))
        entity.id = collection.ns.sign(entity_id)
        entity.collection_id = collection.id
        entity.role_id = role_id
        entity.update(data, collection)
        return entity

    @classmethod
    def by_id(cls, entity_id, collection=None):
        q = cls.all().filter(cls.id == entity_id)
        if collection is not None:
            q = q.filter(cls.collection_id == collection.id)
        return q.first()

    @classmethod
    def by_collection(cls, collection_id):
        q = cls.all()
        q = q.filter(Entity.collection_id == collection_id)
        q = q.yield_per(5000)
        return q

    @classmethod
    def delete_by_collection(cls, collection_id):
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq.delete(synchronize_session=False)

    def __repr__(self):
        return "<Entity(%r, %r)>" % (self.id, self.schema)
Beispiel #20
0
class Alert(db.Model, SoftDeleteModel):
    """A subscription to notifications on a given query."""

    __tablename__ = 'alert'

    id = db.Column(db.Integer, primary_key=True)
    role_id = db.Column(db.Integer, db.ForeignKey('role.id'), index=True)
    custom_label = db.Column(db.Unicode, nullable=True)
    query_text = db.Column(db.Unicode, nullable=True)
    entity_id = db.Column(db.String(32),
                          db.ForeignKey('entity.id'),
                          nullable=True)  # noqa
    entity = db.relationship(Entity,
                             backref=db.backref('alerts',
                                                lazy='dynamic'))  # noqa
    notified_at = db.Column(db.DateTime, nullable=True)

    @property
    def label(self):
        if self.custom_label is not None:
            return self.custom_label
        if self.entity:
            return self.entity.name
        return self.query_text

    def delete(self, deleted_at=None):
        self.deleted_at = deleted_at or datetime.utcnow()
        db.session.add(self)
        db.session.flush()

    def update(self):
        self.notified_at = datetime.utcnow()
        db.session.add(self)
        db.session.flush()

    def is_same(self, other):
        if other.role_id == self.role_id:
            if other.entity_id == self.entity_id:
                if other.query_text == self.query_text:
                    return True
        return False

    @classmethod
    def by_id(cls, id, role=None):
        q = cls.all().filter_by(id=id)
        if role is not None:
            q = q.filter(cls.role_id == role.id)
        return q.first()

    @classmethod
    def by_role(cls, role):
        return cls.all().filter(cls.role_id == role.id)

    @classmethod
    def create(cls, data, role):
        validate(data, 'alert.json#')
        alert = cls()
        alert.role_id = role.id
        alert.query_text = data.get('query_text')
        if alert.query_text is not None:
            alert.query_text = alert.query_text.strip()
            alert.query_text = alert.query_text or None
        alert.entity_id = data.get('entity_id') or None
        alert.custom_label = data.get('label')
        alert.update()
        return alert

    @classmethod
    def exists(cls, query, role):
        q = cls.all_ids().filter(cls.role_id == role.id)
        query_text = query.get('q')
        if query_text is not None:
            query_text = query_text.strip()
            if not len(query_text):
                query_text = None
        q = q.filter(cls.query_text == query_text)
        entities = query.getlist('entity')
        if len(entities) == 1:
            q = q.filter(cls.entity_id == entities[0])
        else:
            q = q.filter(cls.entity_id == None)  # noqa
        q = q.limit(1)
        return q.scalar()

    @classmethod
    def dedupe(cls, entity_id):
        alerts = cls.all().filter_by(entity_id=entity_id).all()
        for left in alerts:
            for right in alerts:
                if left.id >= right.id:
                    continue
                if left.is_same(right):
                    left.delete()

    def __repr__(self):
        return '<Alert(%r, %r)>' % (self.id, self.label)

    def to_query(self):
        return MultiDict({
            'q': self.query_text or '',
            'entity': self.entity_id
        })

    def to_dict(self):
        return {
            'id': self.id,
            'label': self.label,
            'role_id': self.role_id,
            'query_text': self.query_text,
            'entity_id': self.entity_id,
            'created_at': self.created_at,
            'notified_at': self.notified_at,
            'updated_at': self.updated_at
        }
Beispiel #21
0
class Entity(db.Model, UuidModel, SoftDeleteModel):
    STATE_ACTIVE = 'active'
    STATE_PENDING = 'pending'
    STATE_DELETED = 'deleted'

    name = db.Column(db.Unicode)
    type = db.Column(db.String(255), index=True)
    state = db.Column(db.String(128),
                      nullable=True,
                      default=STATE_ACTIVE,
                      index=True)  # noqa
    foreign_ids = db.Column(ARRAY(db.Unicode()))
    data = db.Column('data', JSONB)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('entities',
                                                    lazy='dynamic'))  # noqa

    def delete_references(self, origin=None):
        pq = db.session.query(Reference)
        pq = pq.filter(Reference.entity_id == self.id)
        if origin is not None:
            pq = pq.filter(Reference.origin == origin)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete_identities(self):
        pq = db.session.query(EntityIdentity)
        pq = pq.filter(EntityIdentity.entity_id == self.id)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete(self, deleted_at=None):
        self.delete_references()
        self.delete_identities()
        deleted_at = deleted_at or datetime.utcnow()
        for alert in self.alerts:
            alert.delete(deleted_at=deleted_at)
        self.state = self.STATE_DELETED
        super(Entity, self).delete(deleted_at=deleted_at)

    @classmethod
    def delete_dangling(cls, collection_id):
        """Delete dangling entities.

        Entities can dangle in pending state while they have no references
        pointing to them, thus making it impossible to enable them. This is
        a routine cleanup function.
        """
        q = db.session.query(cls)
        q = q.filter(cls.collection_id == collection_id)
        q = q.filter(cls.state == cls.STATE_PENDING)
        q = q.outerjoin(Reference)
        q = q.group_by(cls)
        q = q.having(func.count(Reference.id) == 0)
        for entity in q.all():
            entity.delete()

    def merge(self, other):
        if self.id == other.id:
            raise ValueError("Cannot merge an entity with itself.")
        if self.collection_id != other.collection_id:
            raise ValueError(
                "Cannot merge entities from different collections.")  # noqa

        data = merge_data(self.data, other.data)
        if self.name.lower() != other.name.lower():
            data = merge_data(data, {'alias': [other.name]})

        self.data = data
        self.state = self.STATE_ACTIVE
        self.foreign_ids = self.foreign_ids or []
        self.foreign_ids += other.foreign_ids or []
        self.created_at = min((self.created_at, other.created_at))
        self.updated_at = datetime.utcnow()

        # update alerts
        from aleph.model.alert import Alert
        q = db.session.query(Alert).filter(Alert.entity_id == other.id)
        q.update({'entity_id': self.id})

        # update document references
        from aleph.model.reference import Reference
        q = db.session.query(Reference).filter(Reference.entity_id == other.id)
        q.update({'entity_id': self.id})

        # delete source entities
        other.delete()
        db.session.add(self)
        db.session.commit()
        db.session.refresh(other)

    def update(self, entity):
        data = entity.get('data') or {}
        data['name'] = entity.get('name')
        self.data = self.schema.validate(data)
        self.name = self.data.pop('name')
        fid = [string_value(f) for f in entity.get('foreign_ids') or []]
        self.foreign_ids = list(set([f for f in fid if f is not None]))
        self.state = entity.pop('state', self.STATE_ACTIVE)
        self.updated_at = datetime.utcnow()
        db.session.add(self)

    @classmethod
    def save(cls, data, collection, merge=False):
        ent = cls.by_id(data.get('id'))
        if ent is None:
            ent = cls()
            ent.type = data.pop('schema', None)
            if ent.type is None:
                raise ValueError("No schema provided.")
            ent.id = make_textid()

        if merge:
            data = merge_data(data, ent.to_dict())

        if collection is None:
            raise ValueError("No collection specified.")

        ent.collection = collection
        ent.update(data)
        return ent

    @classmethod
    def filter_collections(cls, q, collections=None):
        if collections is None:
            return q
        collection_ids = []
        for collection in collections:
            if isinstance(collection, Collection):
                collection = collection.id
            collection_ids.append(collection)
        q = q.filter(Entity.collection_id.in_(collection_ids))
        return q

    @classmethod
    def by_id_set(cls, ids, collections=None):
        if not len(ids):
            return {}
        q = cls.all()
        q = cls.filter_collections(q, collections=collections)
        q = q.options(joinedload('collection'))
        q = q.filter(cls.id.in_(ids))
        entities = {}
        for ent in q:
            entities[ent.id] = ent
        return entities

    @classmethod
    def by_foreign_id(cls, foreign_id, collection_id, deleted=False):
        foreign_id = string_value(foreign_id)
        if foreign_id is None:
            return None
        q = cls.all(deleted=deleted)
        q = q.filter(Entity.collection_id == collection_id)
        foreign_id = func.cast([foreign_id], ARRAY(db.Unicode()))
        q = q.filter(cls.foreign_ids.contains(foreign_id))
        q = q.order_by(Entity.deleted_at.desc().nullsfirst())
        return q.first()

    @classmethod
    def latest(cls):
        q = db.session.query(func.max(cls.updated_at))
        q = q.filter(cls.state == cls.STATE_ACTIVE)
        return q.scalar()

    @property
    def schema(self):
        return schemata.get(self.type)

    @property
    def terms(self):
        terms = set([self.name])
        for alias in ensure_list(self.data.get('alias')):
            if alias is not None and len(alias):
                terms.add(alias)
        return terms

    @property
    def regex_terms(self):
        # This is to find the shortest possible regex for each entity.
        # If, for example, and entity matches both "Al Qaeda" and
        # "Al Qaeda in Iraq, Syria and the Levant", it is useless to
        # search for the latter.
        terms = set([normalize_strong(t) for t in self.terms])
        regex_terms = set()
        for term in terms:
            if term is None or len(term) < 4 or len(term) > 120:
                continue
            contained = False
            for other in terms:
                if other is None or other == term:
                    continue
                if other in term:
                    contained = True
            if not contained:
                regex_terms.add(term)
        return regex_terms

    def to_dict(self):
        data = super(Entity, self).to_dict()
        data.update({
            'schema': self.type,
            'name': self.name,
            'state': self.state,
            'data': self.data,
            'foreign_ids': self.foreign_ids or [],
            'collection_id': self.collection_id
        })
        return data

    def to_index(self):
        entity = self.to_dict()
        entity['properties'] = {'name': [self.name]}
        for k, v in self.data.items():
            v = ensure_list(v)
            if len(v):
                entity['properties'][k] = v
        return entity

    def to_ref(self):
        return {
            'id': self.id,
            'label': self.name,
            'schema': self.type,
            'collection_id': self.collection_id
        }

    def __unicode__(self):
        return self.name

    def __repr__(self):
        return '<Entity(%r, %r)>' % (self.id, self.name)
Beispiel #22
0
class Entity(db.Model, UuidModel, SoftDeleteModel, SchemaModel):
    _schema = '/entity/entity.json#'
    _schema_recurse = True

    STATE_ACTIVE = 'active'
    STATE_PENDING = 'pending'
    STATE_DELETED = 'deleted'

    name = db.Column(db.Unicode)
    type = db.Column('type', db.String(255), index=True)
    state = db.Column(db.String(128), nullable=True, default=STATE_ACTIVE)
    summary = db.Column(db.Unicode, nullable=True)
    description = db.Column(db.Unicode, nullable=True)
    jurisdiction_code = db.Column(db.Unicode, nullable=True)
    register_name = db.Column(db.Unicode, nullable=True)
    register_url = db.Column(db.Unicode, nullable=True)

    __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': _schema}

    collections = db.relationship(
        Collection,
        secondary=collection_entity_table,  # noqa
        backref=db.backref('entities', lazy='dynamic'))  # noqa

    def delete_references(self, origin=None):
        pq = db.session.query(Reference)
        pq = pq.filter(Reference.entity_id == self.id)
        if origin is not None:
            pq = pq.filter(Reference.origin == origin)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete(self, deleted_at=None):
        self.delete_references()
        deleted_at = deleted_at or datetime.utcnow()
        for alert in self.alerts:
            alert.delete(deleted_at=deleted_at)
        self.state = self.STATE_DELETED
        super(Entity, self).delete(deleted_at=deleted_at)

    def update(self, data, merge=False):
        self.schema_update(data, merge=merge)

    def merge(self, other):
        if self.id == other.id:
            return

        # De-dupe todo:
        # 1. merge identifiers
        # 2. merge properties
        # 3. merge names, make merged names into a.k.a's
        # 4. merge collections
        # 5. update references
        # 6. update alerts
        # 7. delete source entities
        # 8. update source entities
        # 9. update target entity

        collections = list(self.collections)
        for collection in other.collections:
            if collection not in collections:
                self.collections.append(collection)

        if self.name.lower() != other.name.lower():
            aka = EntityOtherName()
            aka.update({'name': other.name})
            aka.entity = self
            db.session.add(aka)

        from aleph.model.alert import Alert
        q = db.session.query(Alert).filter(Alert.entity_id == other.id)
        q.update({'entity_id': self.id})

        from aleph.model.reference import Reference
        q = db.session.query(Reference).filter(Reference.entity_id == other.id)
        q.update({'entity_id': self.id})
        db.session.commit()

        db.session.refresh(other)
        self.schema_merge(other)

    def schema_merge(self, other):
        """Attempt to merge other onto self via JSON schema."""
        # TODO: figure out if we want to change schema
        for prop in self.schema_visitor.properties:
            if prop.name == 'id':
                continue

            self_value = getattr(self, prop.name) if \
                hasattr(self, prop.name) else None
            other_value = getattr(other, prop.name) if \
                hasattr(other, prop.name) else None

            if self_value is None and other_value is None:
                continue

            if prop.is_value and self_value is None:
                # update local properties
                setattr(self, prop.name, other_value)

            elif prop.is_object and self._schema_recurse:
                # update associated objects which are not set on the
                # existing object.
                rel = self._get_relationship(prop.name, 'MANYTOONE')
                if self_value is not None or other_value is None:
                    continue
                data = other_value.to_dict()
                obj = type(other_value)()
                obj.update(data)
                for local, remote in self._get_associations(obj, rel):
                    other_id = getattr(obj, remote)
                    setattr(self, local, other_id)

            elif prop.is_array and self._schema_recurse \
                    and other_value is not None:
                # merge array associations
                rel = self._get_relationship(prop.name, 'ONETOMANY')
                full_list = list(self_value)

                for new_item in other_value:
                    data = new_item.to_dict()
                    existing = [o for o in full_list if o.merge_compare(data)]
                    if len(existing):
                        continue

                    obj = type(new_item)()
                    obj.update(data)
                    for local, remote in self._get_associations(obj, rel):
                        setattr(obj, remote, getattr(self, local))
                    db.session.add(obj)
                    full_list.append(obj)

        self.created_at = min((self.created_at, other.created_at))
        self.updated_at = datetime.utcnow()
        other.delete()
        db.session.flush()

    @classmethod
    def save(cls, data, collections, merge=False):
        ent = cls.by_id(data.get('id'))
        if 'state' not in data:
            data['state'] = cls.STATE_ACTIVE

        for identifier in data.get('identifiers', []):
            if ent is None:
                ent = cls.by_identifier(identifier.get('scheme'),
                                        identifier.get('identifier'),
                                        collections=collections)
        if ent is None:
            schema = data.get('$schema', cls._schema)
            cls = cls.get_schema_class(schema)
            ent = cls()
            ent.id = make_textid()

        if merge:
            for collection in ent.collections:
                if collection.id not in [c.id for c in collections]:
                    collections.append(collection)
        if not len(collections):
            raise AttributeError("No collection specified.")

        ent.collections = collections
        ent.update(data, merge=merge)
        return ent

    @classmethod
    def filter_collections(cls, q, collections=None):
        if collections is None:
            return q
        collection_ids = []
        for collection in collections:
            if isinstance(collection, Collection):
                collection = collection.id
            collection_ids.append(collection)
        coll = aliased(Collection)
        q = q.join(coll, Entity.collections)
        q = q.filter(coll.id.in_(collection_ids))
        q = q.filter(coll.deleted_at == None)  # noqa
        return q

    @classmethod
    def by_identifier(cls, scheme, identifier, collections=None):
        q = db.session.query(Entity)
        q = q.filter(Entity.deleted_at == None)  # noqa
        q = cls.filter_collections(q, collections=collections)
        ident = aliased(EntityIdentifier)
        q = q.join(ident, Entity.identifiers)
        q = q.filter(ident.deleted_at == None)  # noqa
        q = q.filter(ident.scheme == scheme)
        q = q.filter(ident.identifier == identifier)
        return q.first()

    @classmethod
    def by_id_set(cls, ids, collections=None):
        if not len(ids):
            return {}
        q = cls.all()
        q = cls.filter_collections(q, collections=collections)
        q = q.options(joinedload('collections'))
        q = q.filter(cls.id.in_(ids))
        entities = {}
        for ent in q:
            entities[ent.id] = ent
        return entities

    @classmethod
    def latest(cls):
        q = db.session.query(func.max(cls.updated_at))
        q = q.filter(cls.state == cls.STATE_ACTIVE)
        return q.scalar()

    @classmethod
    def all_by_document(cls, document_id):
        from aleph.model.reference import Reference
        q = cls.all()
        q = q.options(joinedload('collections'))
        q = q.filter(cls.state == cls.STATE_ACTIVE)
        q = q.join(Reference)
        q = q.filter(Reference.document_id == document_id)
        return q.distinct()

    @property
    def fingerprint(self):
        return make_fingerprint(self.name)

    @property
    def terms(self):
        terms = set([self.name])
        for other_name in self.other_names:
            terms.update(other_name.terms)
        return [t for t in terms if t is not None and len(t)]

    @property
    def regex_terms(self):
        # This is to find the shortest possible regex for each entity.
        # If, for example, and entity matches both "Al Qaeda" and
        # "Al Qaeda in Iraq, Syria and the Levant", it is useless to
        # search for the latter.
        terms = [' %s ' % normalize_strong(t) for t in self.terms]
        regex_terms = set()
        for term in terms:
            if len(term) < 4 or len(term) > 120:
                continue
            contained = False
            for other in terms:
                if other == term:
                    continue
                if other in term:
                    contained = True
            if not contained:
                regex_terms.add(term.strip())
        return regex_terms

    def __repr__(self):
        return '<Entity(%r, %r)>' % (self.id, self.name)

    def __unicode__(self):
        return self.name

    def to_dict(self):
        data = super(Entity, self).to_dict()
        data['collection_id'] = [c.id for c in self.collections]
        return data

    def to_ref(self):
        return {
            'id': self.id,
            'name': self.name,
            '$schema': self.type,
            'collection_id': [c.id for c in self.collections]
        }
Beispiel #23
0
class Entity(db.Model, UuidModel, SoftDeleteModel):
    THING = 'Thing'

    name = db.Column(db.Unicode)
    schema = db.Column(db.String(255), index=True)
    foreign_ids = db.Column(ARRAY(db.Unicode()))
    data = db.Column('data', JSONB)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('entities',
                                                    lazy='dynamic'))  # noqa

    @property
    def model(self):
        return model.get(self.schema)

    @property
    def terms(self):
        terms = set([self.name])
        for alias in ensure_list(self.data.get('alias')):
            if alias is not None and len(alias):
                terms.add(alias)
        return terms

    @property
    def regex_terms(self):
        # This is to find the shortest possible regex for each entity.
        # If, for example, and entity matches both "Al Qaeda" and
        # "Al Qaeda in Iraq, Syria and the Levant", it is useless to
        # search for the latter.
        terms = set([match_form(t) for t in self.terms])
        regex_terms = set()
        for term in terms:
            if term is None or len(term) < 4 or len(term) > 120:
                continue
            contained = False
            for other in terms:
                if other is None or other == term:
                    continue
                if other in term:
                    contained = True
            if not contained:
                regex_terms.add(term)
        return regex_terms

    def delete_matches(self):
        pq = db.session.query(Match)
        pq = pq.filter(
            or_(Match.entity_id == self.id, Match.match_id == self.id))
        pq.delete(synchronize_session=False)
        db.session.refresh(self)

    def delete(self, deleted_at=None):
        self.delete_matches()
        deleted_at = deleted_at or datetime.utcnow()
        for alert in self.alerts:
            alert.delete(deleted_at=deleted_at)
        super(Entity, self).delete(deleted_at=deleted_at)

    @classmethod
    def delete_by_collection(cls, collection_id, deleted_at=None):
        from aleph.model import Alert
        deleted_at = deleted_at or datetime.utcnow()

        entities = db.session.query(cls.id)
        entities = entities.filter(cls.collection_id == collection_id)
        entities = entities.subquery()

        pq = db.session.query(Alert)
        pq = pq.filter(Alert.entity_id.in_(entities))
        pq.update({Alert.deleted_at: deleted_at}, synchronize_session=False)

        pq = db.session.query(Match)
        pq = pq.filter(Match.entity_id.in_(entities))
        pq.delete(synchronize_session=False)

        pq = db.session.query(Match)
        pq = pq.filter(Match.match_id.in_(entities))
        pq.delete(synchronize_session=False)

        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq = pq.filter(cls.deleted_at == None)  # noqa
        pq.update({cls.deleted_at: deleted_at}, synchronize_session=False)

    def merge(self, other):
        if self.id == other.id:
            raise ValueError("Cannot merge an entity with itself.")
        if self.collection_id != other.collection_id:
            raise ValueError(
                "Cannot merge entities from different collections.")  # noqa

        self.schema = model.precise_schema(self.schema, other.schema)
        self.foreign_ids = string_set(self.foreign_ids, self.foreign_ids)
        self.created_at = min((self.created_at, other.created_at))
        self.updated_at = datetime.utcnow()

        data = merge_data(self.data, other.data)
        if self.name != other.name:
            data = merge_data(data, {'alias': [other.name]})
        self.data = data

        # update alerts
        from aleph.model.alert import Alert
        q = db.session.query(Alert).filter(Alert.entity_id == other.id)
        q.update({Alert.entity_id: self.id})

        # delete source entities
        other.delete()
        db.session.add(self)
        db.session.commit()
        db.session.refresh(other)

    def update(self, entity):
        self.schema = entity.get('schema')

        data = entity.get('properties')
        if is_mapping(data):
            data['name'] = [entity.get('name')]
            self.data = self.model.validate(data)
        elif self.data is None:
            self.data = {}

        self.data.pop('name', None)
        self.name = entity.get('name')

        # TODO: should this be mutable?
        # self.foreign_ids = string_set(entity.get('foreign_ids'))
        self.updated_at = datetime.utcnow()
        db.session.add(self)

    @classmethod
    def create(cls, data, collection):
        foreign_ids = string_set(data.get('foreign_ids'))
        ent = cls.by_foreign_ids(foreign_ids, collection.id, deleted=True)
        if ent is None:
            ent = cls()
            ent.id = make_textid()
            ent.collection = collection
            ent.foreign_ids = foreign_ids
        ent.update(data)
        ent.deleted_at = None
        return ent

    @classmethod
    def by_foreign_ids(cls, foreign_ids, collection_id, deleted=False):
        if not len(foreign_ids):
            return None
        q = cls.all(deleted=deleted)
        q = q.filter(Entity.collection_id == collection_id)
        foreign_id = func.cast(foreign_ids, ARRAY(db.Unicode()))
        q = q.filter(cls.foreign_ids.contains(foreign_id))
        q = q.order_by(Entity.deleted_at.desc().nullsfirst())
        return q.first()

    @classmethod
    def all_ids(cls, deleted=False, authz=None):
        q = super(Entity, cls).all_ids(deleted=deleted)
        if authz is not None and not authz.is_admin:
            q = q.join(Permission,
                       cls.collection_id == Permission.collection_id)
            q = q.filter(Permission.deleted_at == None)  # noqa
            q = q.filter(Permission.read == True)  # noqa
            q = q.filter(Permission.role_id.in_(authz.roles))
        return q

    @classmethod
    def latest(cls):
        q = db.session.query(func.max(cls.updated_at))
        q = q.filter(cls.deleted_at == None)  # noqa
        return q.scalar()

    def __repr__(self):
        return '<Entity(%r, %r)>' % (self.id, self.name)
Beispiel #24
0
class EntitySetItem(db.Model, SoftDeleteModel):
    __tablename__ = "entityset_item"

    id = db.Column(db.Integer, primary_key=True)
    entityset_id = db.Column(db.String(ENTITY_ID_LEN),
                             db.ForeignKey("entityset.id"),
                             index=True)
    entity_id = db.Column(db.String(ENTITY_ID_LEN), index=True)
    collection_id = db.Column(db.Integer,
                              db.ForeignKey("collection.id"),
                              index=True)

    compared_to_entity_id = db.Column(db.String(ENTITY_ID_LEN))
    added_by_id = db.Column(db.Integer, db.ForeignKey("role.id"))
    judgement = db.Column(db.Enum(Judgement))

    entityset = db.relationship(EntitySet)
    collection = db.relationship(Collection)
    added_by = db.relationship(Role)

    @classmethod
    def by_entity_id(cls, entityset, entity_id):
        q = cls.all()
        q = q.filter(cls.entityset_id == entityset.id)
        q = q.filter(cls.entity_id == entity_id)
        q = q.order_by(cls.created_at.desc())
        return q.first()

    @classmethod
    def save(cls,
             entityset,
             entity_id,
             judgement=None,
             collection_id=None,
             **data):
        if judgement is None:
            judgement = Judgement.POSITIVE
        else:
            judgement = Judgement(judgement)
        existing = cls.by_entity_id(entityset, entity_id)
        if existing is not None:
            if existing.judgement == judgement:
                return existing
            existing.delete()
        if judgement == Judgement.NO_JUDGEMENT:
            return
        item = cls(
            entityset_id=entityset.id,
            entity_id=entity_id,
            judgement=judgement,
            compared_to_entity_id=data.get("compared_to_entity_id"),
            collection_id=collection_id or entityset.collection_id,
            added_by_id=data.get("added_by_id"),
        )
        db.session.add(item)
        return item

    @classmethod
    def delete_by_collection(cls, collection_id):
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq.delete(synchronize_session=False)

        pq = db.session.query(cls)
        pq = pq.filter(EntitySet.collection_id == collection_id)
        pq = pq.filter(EntitySet.id == cls.entityset_id)
        pq.delete(synchronize_session=False)

    @classmethod
    def delete_by_entity(cls, entity_id):
        pq = db.session.query(cls)
        pq = pq.filter(cls.entity_id == entity_id)
        pq.delete(synchronize_session=False)

    def to_dict(self):
        data = self.to_dict_dates()
        data.update({
            "entityset_id": self.entityset_id,
            "entity_id": self.entity_id,
            "collection_id": self.collection_id,
            "added_by_id": self.added_by_id,
            "compared_to_entity_id": self.compared_to_entity_id,
        })
        if self.judgement:
            data["judgement"] = self.judgement.value
        return data

    def __repr__(self):
        return "<EntitySetItem(%r, %r)>" % (self.entityset_id, self.entity_id)
Beispiel #25
0
class Notification(db.Model, IdModel, DatedModel):
    GLOBAL = 'Global'

    _event = db.Column('event', db.String(255), nullable=False)
    channels = db.Column(ARRAY(db.String(255)), index=True)
    params = db.Column(JSONB)

    actor_id = db.Column(db.Integer, db.ForeignKey('role.id'), nullable=True)
    actor = db.relationship(Role)

    @hybrid_property
    def event(self):
        return Events.get(self._event)

    @event.setter
    def event(self, event):
        self._event = event.name

    def iterparams(self):
        if self.actor_id is not None:
            yield 'actor', Role, self.actor_id
        if self.event is None:
            return
        for name, clazz in self.event.params.items():
            value = self.params.get(name)
            if value is not None:
                yield name, clazz, value

    def to_dict(self):
        data = self.to_dict_dates()
        data.update({
            'id': self.id,
            'actor_id': self.actor_id,
            'event': self._event,
            'params': self.params
        })
        return data

    @classmethod
    def publish(cls, event, actor_id=None, channels=[], params={}):
        notf = cls()
        notf.event = event
        notf.actor_id = actor_id
        notf.params = params
        notf.channels = list(set([c for c in channels if c is not None]))
        db.session.add(notf)
        return notf

    @classmethod
    def by_channels(cls, channels, role, since=None):
        channels = cast(channels, ARRAY(db.String(255)))
        q = cls.all()
        q = q.filter(cls.channels.overlap(channels))
        q = q.filter(cls._event.in_(Events.names()))
        q = q.filter(or_(
            cls.actor_id != role.id,
            cls.actor_id == None  # noqa
        ))
        since = since or role.notified_at
        if since is not None and role.notified_at is not None:
            since = max(since, role.notified_at)
        if since is not None:
            q = q.filter(cls.created_at >= since)
        q = q.order_by(cls.created_at.desc())
        return q

    @classmethod
    def delete_by_channel(cls, channel):
        q = cls.all()
        q = q.filter(cls.channels.any(channel))
        q.delete(synchronize_session=False)
Beispiel #26
0
class Entity(db.Model, SoftDeleteModel):
    THING = 'Thing'
    LEGAL_ENTITY = 'LegalEntity'

    id = db.Column(db.String(ENTITY_ID_LEN),
                   primary_key=True,
                   default=make_textid,
                   nullable=False,
                   unique=False)
    name = db.Column(db.Unicode)
    schema = db.Column(db.String(255), index=True)
    foreign_id = db.Column(db.Unicode)
    data = db.Column('data', JSONB)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('entities',
                                                    lazy='dynamic'))  # noqa

    @property
    def model(self):
        return model.get(self.schema)

    @property
    def signed_id(self):
        return self.collection.ns.sign(self.id)

    def delete_matches(self):
        pq = db.session.query(Match)
        pq = pq.filter(
            or_(Match.entity_id == self.id, Match.match_id == self.id))
        pq.delete(synchronize_session=False)
        db.session.refresh(self)

    def delete(self, deleted_at=None):
        self.delete_matches()
        deleted_at = deleted_at or datetime.utcnow()
        super(Entity, self).delete(deleted_at=deleted_at)

    def update(self, entity):
        proxy = model.get_proxy(entity)
        proxy.schema.validate(entity)
        self.schema = proxy.schema.name
        previous = self.to_proxy()
        for prop in proxy.iterprops():
            # Do not allow the user to overwrite hashes because this could
            # lead to a user accessing random objects.
            if prop.type == registry.checksum:
                proxy.set(prop, previous.get(prop), cleaned=True, quiet=True)
        self.data = proxy.properties
        self.updated_at = datetime.utcnow()
        db.session.add(self)

    def to_proxy(self):
        proxy = model.get_proxy({
            'id': self.id,
            'schema': self.schema,
            'properties': self.data
        })
        proxy.add('name', self.name)
        proxy.set('indexUpdatedAt', self.updated_at)
        return proxy

    @classmethod
    def create(cls, data, collection):
        foreign_id = data.get('foreign_id')
        ent = cls.by_foreign_id(foreign_id, collection.id, deleted=True)
        if ent is None:
            ent = cls()
            ent.id = make_textid()
            ent.collection = collection
            ent.foreign_id = foreign_id
            ent.data = {}
        ent.deleted_at = None
        ent.update(data)
        return ent

    @classmethod
    def by_id(cls, entity_id, collection_id=None):
        entity_id, _ = Namespace.parse(entity_id)
        q = cls.all()
        q = q.filter(cls.id == entity_id)
        return q.first()

    @classmethod
    def by_foreign_id(cls, foreign_id, collection_id, deleted=False):
        if foreign_id is None:
            return None
        q = cls.all(deleted=deleted)
        q = q.filter(Entity.collection_id == collection_id)
        q = q.filter(cls.foreign_id == foreign_id)
        q = q.order_by(Entity.deleted_at.desc().nullsfirst())
        return q.first()

    @classmethod
    def by_collection(cls, collection_id):
        return cls.all().filter(Entity.collection_id == collection_id)

    @classmethod
    def delete_by_collection(cls, collection_id, deleted_at=None):
        deleted_at = deleted_at or datetime.utcnow()

        entities = db.session.query(cls.id)
        entities = entities.filter(cls.collection_id == collection_id)
        entities = entities.subquery()

        pq = db.session.query(Match)
        pq = pq.filter(Match.entity_id.in_(entities))
        pq.delete(synchronize_session=False)

        pq = db.session.query(Match)
        pq = pq.filter(Match.match_id.in_(entities))
        pq.delete(synchronize_session=False)

        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq = pq.filter(cls.deleted_at == None)  # noqa
        pq.update({cls.deleted_at: deleted_at}, synchronize_session=False)

    def __repr__(self):
        return '<Entity(%r, %r)>' % (self.id, self.name)
Beispiel #27
0
class EntitySet(db.Model, SoftDeleteModel):
    __tablename__ = "entityset"

    # set types
    LIST = "list"
    DIAGRAM = "diagram"
    TIMELINE = "timeline"
    PROFILE = "profile"

    TYPES = frozenset([LIST, DIAGRAM, TIMELINE, PROFILE])

    id = db.Column(db.String(ENTITY_ID_LEN), primary_key=True)
    label = db.Column(db.Unicode)
    type = db.Column(db.String(10), index=True, default=LIST)
    summary = db.Column(db.Unicode, nullable=True)
    layout = db.Column("layout", JSONB, nullable=True)

    role_id = db.Column(db.Integer, db.ForeignKey("role.id"), index=True)
    role = db.relationship(Role)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey("collection.id"),
                              index=True)
    collection = db.relationship(Collection)

    parent_id = db.Column(db.String(ENTITY_ID_LEN),
                          db.ForeignKey("entityset.id"))
    parent = db.relationship("EntitySet", backref="children", remote_side=[id])

    @property
    def entities(self):
        q = db.session.query(EntitySetItem.entity_id)
        q = q.filter(EntitySetItem.entityset_id == self.id)
        q = q.filter(EntitySetItem.judgement == Judgement.POSITIVE)
        q = q.filter(EntitySetItem.deleted_at == None)  # noqa
        return [entity_id for entity_id, in q.all()]

    @classmethod
    def create(cls, data, collection, authz):
        entityset = cls()
        entityset.id = make_textid()
        entityset.layout = {}
        entityset.role_id = authz.id
        entityset.collection_id = collection.id
        entityset.update(data)
        return entityset

    @classmethod
    def by_authz(cls, authz, types=None, prefix=None):
        ids = authz.collections(authz.READ)
        q = cls.by_type(types)
        q = q.filter(cls.collection_id.in_(ids))
        if prefix is not None:
            q = q.filter(query_like(cls.label, prefix))
        return q

    @classmethod
    def by_type(cls, types):
        """Retuns EntitySets of a particular type"""
        q = EntitySet.all()
        types = ensure_list(types)
        if len(types) and types != cls.TYPES:
            q = q.filter(EntitySet.type.in_(types))
        return q

    @classmethod
    def by_collection_id(cls, collection_id, types=None):
        """Retuns EntitySets within a given collection_id"""
        q = cls.by_type(types)
        q = q.filter(EntitySet.collection_id == collection_id)
        return q

    @classmethod
    def by_entity_id(cls,
                     entity_id,
                     collection_ids=None,
                     judgements=None,
                     types=None,
                     labels=None):
        """Retuns EntitySets that include EntitySetItems with the provided entity_id.

        NOTE: This only considers EntitySetItems who haven't been deleted
        """
        q = cls.by_type(types)
        if labels is not None:
            q = q.filter(EntitySet.label.in_(ensure_list(labels)))
        q = q.join(EntitySetItem)
        q = q.filter(EntitySetItem.deleted_at == None)  # NOQA
        q = q.filter(EntitySetItem.entity_id == entity_id)
        if collection_ids:
            q = q.filter(EntitySet.collection_id.in_(collection_ids))
        if judgements is not None:
            q = q.filter(EntitySetItem.judgement.in_(ensure_list(judgements)))
        return q

    @classmethod
    def delete_by_collection(cls, collection_id, deleted_at):
        EntitySetItem.delete_by_collection(collection_id)

        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq = pq.filter(cls.deleted_at == None)  # noqa
        pq.update({cls.deleted_at: deleted_at}, synchronize_session=False)

    def items(self, authz=None, deleted=False):
        q = EntitySetItem.all(deleted=deleted)
        if authz is not None:
            ids = authz.collections(authz.READ)
            q = q.filter(EntitySetItem.collection_id.in_(ids))
        q = q.filter(EntitySetItem.entityset_id == self.id)
        q = q.order_by(EntitySetItem.created_at.asc())
        return q

    def profile(self, judgements=None, deleted=False):
        q = self.items(deleted=deleted)
        if judgements is not None:
            q = q.filter(EntitySetItem.judgement.in_(judgements))
        return q

    def merge(self, other, merged_by_id):
        """Merge two entity_sets into each other. The older one is
        retained. This tries to retain a state where there is only
        one judgement between a set and an entity.
        """
        if other.id == self.id:
            return self
        if other.created_at > self.created_at:
            return other.merge(self, merged_by_id)

        local_items = {i.entity_id: i for i in self.items()}
        for remote in other.items():
            local = local_items.get(remote.entity_id)
            if local is None:
                remote.entityset_id = self.id
                remote.updated_at = datetime.utcnow()
                db.session.add(remote)
                continue
            judgement = local.judgment + remote.judgement
            if judgement == local.judgement:
                remote.delete()
                continue

            origin = local.compared_to_entity_id or remote.compared_to_entity_id
            combined = EntitySetItem(
                entityset_id=self.id,
                entity_id=local.entity_id,
                collection_id=local.collection_id,
                added_by_id=merged_by_id,
                judgement=judgement,
                compared_to_entity_id=origin,
            )
            db.session.add(combined)
            local.delete()
            remote.delete()
        other.delete()
        self.updated_at = datetime.utcnow()
        db.session.add(self)
        db.session.flush()
        return self

    def update(self, data):
        self.label = data.get("label", self.label)
        self.type = data.get("type", self.type)
        self.summary = data.get("summary", self.summary)
        self.layout = data.get("layout", self.layout)
        self.updated_at = datetime.utcnow()
        self.deleted_at = None
        db.session.add(self)

    def delete(self, deleted_at=None):
        pq = db.session.query(EntitySetItem)
        pq = pq.filter(EntitySetItem.entityset_id == self.id)
        pq = pq.filter(EntitySetItem.deleted_at == None)  # noqa
        pq.update({EntitySetItem.deleted_at: deleted_at},
                  synchronize_session=False)

        for mapping in self.mappings:
            mapping.entityset_id = None
            db.session.add(mapping)

        self.deleted_at = deleted_at or datetime.utcnow()
        db.session.add(self)

    def to_dict(self):
        data = self.to_dict_dates()
        data.update({
            "id": stringify(self.id),
            "type": self.type,
            "label": self.label,
            "summary": self.summary,
            "layout": self.layout,
            "role_id": stringify(self.role_id),
            "collection_id": stringify(self.collection_id),
        })
        return data

    def __repr__(self):
        return "<EntitySet(%r, %r)>" % (self.id, self.collection_id)
Beispiel #28
0
class Mapping(db.Model, DatedModel):
    """A mapping to load entities from a table"""

    __tablename__ = "mapping"

    FAILED = "failed"
    SUCCESS = "success"
    PENDING = "pending"
    STATUS = {
        SUCCESS: lazy_gettext("success"),
        FAILED: lazy_gettext("failed"),
        PENDING: lazy_gettext("pending"),
    }

    id = db.Column(db.Integer, primary_key=True)
    query = db.Column("query", JSONB)

    role_id = db.Column(db.Integer, db.ForeignKey("role.id"), index=True)
    role = db.relationship(Role,
                           backref=db.backref("mappings",
                                              lazy="dynamic"))  # noqa

    collection_id = db.Column(db.Integer,
                              db.ForeignKey("collection.id"),
                              index=True)
    collection = db.relationship(Collection,
                                 backref=db.backref("mappings",
                                                    lazy="dynamic"))

    table_id = db.Column(db.String(ENTITY_ID_LEN), index=True)

    disabled = db.Column(db.Boolean, nullable=True)
    last_run_status = db.Column(db.Unicode, nullable=True)
    last_run_err_msg = db.Column(db.Unicode, nullable=True)

    def get_proxy_context(self):
        """Metadata to be added to each generated entity."""
        return {
            "created_at": iso_text(self.created_at),
            "updated_at": iso_text(self.updated_at),
            "role_id": self.role_id,
            "mutable": True,
        }

    def update(self, query=None, table_id=None):
        self.updated_at = datetime.utcnow()
        if query:
            self.query = query
        if table_id:
            self.table_id = table_id
        db.session.add(self)

    def set_status(self, status, error=None):
        self.last_run_status = status
        self.last_run_err_msg = error
        db.session.add(self)

    def to_dict(self):
        data = self.to_dict_dates()
        status = self.STATUS.get(self.last_run_status)
        data.update({
            "id": stringify(self.id),
            "query": dict(self.query),
            "role_id": stringify(self.role_id),
            "collection_id": stringify(self.collection_id),
            "table_id": self.table_id,
            "last_run_status": status,
            "last_run_err_msg": self.last_run_err_msg,
        })
        return data

    @classmethod
    def by_collection(cls, collection_id, table_id=None):
        q = cls.all().filter(cls.collection_id == collection_id)
        if table_id is not None:
            q = q.filter(cls.table_id == table_id)
        return q

    @classmethod
    def delete_by_collection(cls, collection_id):
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq.delete(synchronize_session=False)

    @classmethod
    def delete_by_table(cls, entity_id):
        pq = db.session.query(cls)
        pq = pq.filter(cls.table_id == entity_id)
        pq.delete(synchronize_session=False)

    @classmethod
    def create(cls, query, table_id, collection, role_id):
        mapping = cls()
        mapping.role_id = role_id
        mapping.query = query
        mapping.collection_id = collection.id
        mapping.table_id = table_id
        mapping.update()
        return mapping

    def __repr__(self):
        return "<Mapping(%r, %r)>" % (self.id, self.table_id)
Beispiel #29
0
class Mapping(db.Model, SoftDeleteModel):
    """A mapping to load entities from a table"""
    __tablename__ = 'mapping'

    FAILED = 'failed'
    SUCCESS = 'success'
    STATUS = {SUCCESS: lazy_gettext('success'), FAILED: lazy_gettext('failed')}

    id = db.Column(db.Integer, primary_key=True)
    query = db.Column('query', JSONB)

    role_id = db.Column(db.Integer, db.ForeignKey('role.id'), index=True)
    role = db.relationship(Role,
                           backref=db.backref('mappings',
                                              lazy='dynamic'))  # noqa

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('mappings',
                                                    lazy='dynamic'))  # noqa

    table_id = db.Column(db.String(ENTITY_ID_LEN), index=True)

    last_run_status = db.Column(db.Unicode, nullable=True)
    last_run_err_msg = db.Column(db.Unicode, nullable=True)

    def update(self, query=None, table_id=None):
        self.updated_at = datetime.utcnow()
        if query:
            self.query = query
        if table_id:
            self.table_id = table_id
        db.session.add(self)
        db.session.commit()

    def set_status(self, status, error=None):
        self.last_run_status = status
        self.last_run_err_msg = error
        db.session.add(self)
        db.session.commit()

    def delete(self, deleted_at=None):
        self.deleted_at = deleted_at or datetime.utcnow()
        db.session.add(self)
        db.session.commit()

    def to_dict(self):
        data = self.to_dict_dates()
        status = self.STATUS.get(self.last_run_status)
        data.update({
            'id': stringify(self.id),
            'query': dict(self.query),
            'role_id': stringify(self.role_id),
            'collection_id': stringify(self.collection_id),
            'table_id': self.table_id,
            'last_run_status': status,
            'last_run_err_msg': self.last_run_err_msg
        })
        return data

    @classmethod
    def by_collection(cls, collection_id, table_id=None):
        q = cls.all().filter(cls.collection_id == collection_id)
        if table_id is not None:
            q = q.filter(cls.table_id == table_id)
        return q

    @classmethod
    def delete_by_collection(cls, collection_id, deleted_at=None):
        deleted_at = deleted_at or datetime.utcnow()
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq = pq.filter(cls.deleted_at == None)  # noqa
        pq.update({cls.deleted_at: deleted_at}, synchronize_session=False)

    @classmethod
    def create(cls, query, table_id, collection, role_id):
        mapping = cls()
        mapping.role_id = role_id
        mapping.query = query
        mapping.collection_id = collection.id
        mapping.table_id = table_id
        mapping.update()
        return mapping

    def __repr__(self):
        return '<Mapping(%r, %r)>' % (self.id, self.table_id)
Beispiel #30
0
class Document(db.Model, DatedModel):
    SCHEMA = 'Document'
    SCHEMA_FOLDER = 'Folder'
    SCHEMA_TABLE = 'Table'

    id = db.Column(db.BigInteger, primary_key=True)
    content_hash = db.Column(db.Unicode(65), nullable=True, index=True)
    foreign_id = db.Column(db.Unicode, unique=False, nullable=True, index=True)
    schema = db.Column(db.String(255), nullable=False)
    meta = db.Column(JSONB, default={})

    uploader_id = db.Column(db.Integer,
                            db.ForeignKey('role.id'),
                            nullable=True)  # noqa
    parent_id = db.Column(db.BigInteger,
                          db.ForeignKey('document.id'),
                          nullable=True,
                          index=True)  # noqa
    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              nullable=False,
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('documents',
                                                    lazy='dynamic'))  # noqa

    def __init__(self, **kw):
        self.meta = {}
        super(Document, self).__init__(**kw)

    @property
    def model(self):
        return model.get(self.schema)

    @property
    def ancestors(self):
        if self.parent_id is None:
            return []
        key = cache.key('ancestors', self.id)
        ancestors = cache.get_list(key)
        if len(ancestors):
            return ancestors
        parent_key = cache.key('ancestors', self.parent_id)
        ancestors = cache.get_list(parent_key)
        if not len(ancestors):
            ancestors = []
            parent = Document.by_id(self.parent_id)
            if parent is not None:
                ancestors = parent.ancestors
        ancestors.append(self.parent_id)
        if self.model.is_a(model.get(self.SCHEMA_FOLDER)):
            cache.set_list(key, ancestors, expire=cache.EXPIRE)
        return ancestors

    def update(self, data):
        props = ('title', 'summary', 'author', 'crawler', 'source_url',
                 'file_name', 'mime_type', 'headers', 'date', 'authored_at',
                 'modified_at', 'published_at', 'retrieved_at', 'languages',
                 'countries', 'keywords')
        for prop in props:
            self.meta[prop] = data.get(prop, self.meta.get(prop))
        flag_modified(self, 'meta')

    def delete(self, deleted_at=None):
        db.session.delete(self)

    @classmethod
    def delete_by_collection(cls, collection_id):
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq.delete(synchronize_session=False)

    @classmethod
    def save(cls,
             collection,
             parent=None,
             foreign_id=None,
             content_hash=None,
             meta=None,
             uploader_id=None):
        """Try and find a document by various criteria."""
        q = cls.all()
        q = q.filter(Document.collection_id == collection.id)

        if parent is not None:
            q = q.filter(Document.parent_id == parent.id)
        if foreign_id is not None:
            q = q.filter(Document.foreign_id == foreign_id)
        elif content_hash is not None:
            q = q.filter(Document.content_hash == content_hash)
        else:
            raise ValueError("No unique criterion for document.")

        document = q.first()
        if document is None:
            document = cls()
            document.schema = cls.SCHEMA
            document.collection_id = collection.id
            document.uploader_id = uploader_id

        if parent is not None:
            document.parent_id = parent.id

        if foreign_id is not None:
            document.foreign_id = foreign_id

        document.content_hash = content_hash
        if content_hash is None:
            document.schema = cls.SCHEMA_FOLDER

        if meta is not None:
            document.update(meta)

        db.session.add(document)
        return document

    @classmethod
    def by_id(cls, id, collection_id=None):
        try:
            id = int(id)
        except Exception:
            return
        q = cls.all()
        q = q.filter(cls.id == id)
        if collection_id is not None:
            q = q.filter(cls.collection_id == collection_id)
        return q.first()

    @classmethod
    def by_collection(cls, collection_id=None):
        q = cls.all()
        q = q.filter(cls.collection_id == collection_id)
        return q

    @classmethod
    def cleanup_deleted(cls):
        q = db.session.query(Collection.id)
        q = q.filter(Collection.deleted_at != None)  # noqa
        collection_ids = [c for (c, ) in q.all()]
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id.in_(collection_ids))
        pq.delete(synchronize_session=False)

    def to_proxy(self):
        proxy = model.get_proxy({
            'id': str(self.id),
            'schema': self.model,
            'properties': {}
        })
        meta = dict(self.meta)
        headers = meta.pop('headers', {}) or {}
        headers = {slugify(k, sep='_'): v for k, v in headers.items()}
        proxy.set('contentHash', self.content_hash)
        proxy.set('parent', self.parent_id)
        proxy.set('ancestors', self.ancestors)
        proxy.set('crawler', meta.get('crawler'))
        proxy.set('sourceUrl', meta.get('source_url'))
        proxy.set('title', meta.get('title'))
        proxy.set('fileName', meta.get('file_name'))
        if not proxy.has('fileName'):
            disposition = headers.get('content_disposition')
            if disposition is not None:
                _, attrs = cgi.parse_header(disposition)
                proxy.set('fileName', attrs.get('filename'))
        proxy.set('mimeType', meta.get('mime_type'))
        if not proxy.has('mimeType'):
            proxy.set('mimeType', headers.get('content_type'))
        proxy.set('language', meta.get('languages'))
        proxy.set('country', meta.get('countries'))
        proxy.set('keywords', meta.get('keywords'))
        proxy.set('headers', registry.json.pack(headers), quiet=True)
        proxy.set('authoredAt', meta.get('authored_at'))
        proxy.set('modifiedAt', meta.get('modified_at'))
        proxy.set('publishedAt', meta.get('published_at'))
        proxy.set('retrievedAt', meta.get('retrieved_at'))
        proxy.set('indexUpdatedAt', self.created_at)
        proxy.set('sourceUrl', meta.get('source_url'))
        return proxy

    def __repr__(self):
        return '<Document(%r,%r)>' % (self.id, self.schema)