コード例 #1
0
ファイル: reference.py プロジェクト: adamchainz/aleph
class Reference(db.Model, IdModel, DatedModel):
    id = db.Column(db.Integer(), primary_key=True)
    document_id = db.Column(db.BigInteger, db.ForeignKey('document.id'))
    entity_id = db.Column(db.String(32), db.ForeignKey('entity.id'))
    origin = db.Column(db.String(128))
    weight = db.Column(db.Integer)

    entity = db.relationship('Entity',
                             backref=db.backref('references', lazy='dynamic'))
    document = db.relationship('Document',
                               backref=db.backref('references',
                                                  lazy='dynamic'))

    def to_dict(self):
        return {
            'entity': {
                'id': self.entity.id,
                'name': self.entity.name,
                '$schema': self.entity.type
            },
            'weight': self.weight,
            'origin': self.origin
        }

    def __repr__(self):
        return '<Reference(%r, %r)>' % (self.document_id, self.entity_id)
コード例 #2
0
ファイル: entity_details.py プロジェクト: stefanw/aleph
class EntityOtherName(db.Model, EntityDetails):
    _schema = '/entity/other_name.json#'

    entity_id = db.Column(db.String(32), db.ForeignKey('entity.id'), index=True)
    entity = db.relationship('Entity', primaryjoin="and_(Entity.id == foreign(EntityOtherName.entity_id), "  # noqa
                                                        "EntityOtherName.deleted_at == None)",  # noqa
                             backref=db.backref('other_names', lazy='dynamic', cascade='all, delete-orphan'))  # noqa
    name = db.Column(db.Unicode)
    note = db.Column(db.Unicode)
    family_name = db.Column(db.Unicode)
    given_name = db.Column(db.Unicode)
    additional_name = db.Column(db.Unicode)
    honorific_prefix = db.Column(db.Unicode)
    honorific_suffix = db.Column(db.Unicode)
    patronymic_name = db.Column(db.Unicode)
    start_date = db.Column(db.DateTime)
    end_date = db.Column(db.DateTime)

    @property
    def display_name(self):
        if self.name is not None:
            return self.name
        return ''

    @property
    def terms(self):
        return [self.display_name]

    def to_dict(self):
        data = super(EntityOtherName, self).to_dict()
        data['display_name'] = self.display_name
        return data
コード例 #3
0
class DocumentRecord(db.Model):

    id = db.Column(db.BigInteger, primary_key=True)
    sheet = db.Column(db.Integer, nullable=False)
    row_id = db.Column(db.Integer, nullable=False)
    data = db.Column(JSONB)
    document_id = db.Column(db.Integer(), db.ForeignKey('document.id'))
    document = db.relationship(
        Document, backref=db.backref('records',
                                     cascade='all, delete-orphan'))  # noqa

    @property
    def tid(self):
        tid = sha1(str(self.document_id))
        tid.update(str(self.sheet))
        tid.update(str(self.row_id))
        return tid.hexdigest()

    @property
    def text(self):
        if self.data is None:
            return []
        text = [t for t in self.data.values() if t is not None]
        return list(set(text))

    def __repr__(self):
        return '<DocumentRecord(%r,%r)>' % (self.document_id, self.row_id)
コード例 #4
0
ファイル: document.py プロジェクト: maquchizi/aleph
class DocumentPage(db.Model):

    id = db.Column(db.BigInteger, primary_key=True)
    number = db.Column(db.Integer(), nullable=False)
    text = db.Column(db.Unicode(), nullable=False)
    document_id = db.Column(db.Integer(),
                            db.ForeignKey('document.id'),
                            index=True)  # noqa
    document = db.relationship(
        Document, backref=db.backref('pages',
                                     cascade='all, delete-orphan'))  # noqa

    @property
    def tid(self):
        tid = sha1(str(self.document_id))
        tid.update(str(self.id))
        return tid.hexdigest()

    def __repr__(self):
        return '<DocumentPage(%r,%r)>' % (self.document_id, self.number)

    def text_parts(self):
        """Utility method to get all text snippets in a record."""
        text = string_value(self.text)
        if text is not None:
            yield self.text

    def to_dict(self):
        return {
            'id': self.id,
            'number': self.number,
            'text': self.text,
            'document_id': self.document_id
        }
コード例 #5
0
ファイル: document.py プロジェクト: maquchizi/aleph
class DocumentRecord(db.Model):

    id = db.Column(db.BigInteger, primary_key=True)
    sheet = db.Column(db.Integer, nullable=False)
    row_id = db.Column(db.Integer, nullable=False)
    data = db.Column(JSONB)
    document_id = db.Column(db.Integer(),
                            db.ForeignKey('document.id'),
                            index=True)  # noqa
    document = db.relationship(
        Document, backref=db.backref('records',
                                     cascade='all, delete-orphan'))  # noqa

    @property
    def tid(self):
        tid = sha1(str(self.document_id))
        tid.update(str(self.sheet))
        tid.update(str(self.row_id))
        return tid.hexdigest()

    def text_parts(self):
        """Utility method to get all text snippets in a record."""
        for value in self.data.values():
            text = string_value(value)
            if text is not None:
                yield value

    def __repr__(self):
        return '<DocumentRecord(%r,%r)>' % (self.document_id, self.row_id)
コード例 #6
0
class Link(db.Model, UuidModel, SoftDeleteModel):
    type = db.Column(db.String(255), index=True)
    source_id = db.Column(db.String(254), index=True)
    target_id = db.Column(db.String(254), index=True)
    foreign_ids = db.Column(ARRAY(db.Unicode()))
    data = db.Column('data', JSONB)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('links',
                                                    lazy='dynamic'))  # noqa

    @property
    def schema(self):
        return schemata.get(self.type)

    def to_dict(self):
        data = super(Link, self).to_dict()
        data.update({
            'schema': self.type,
            'data': self.data,
            'foreign_ids': self.foreign_ids or [],
            'collection_id': self.collection_id
        })
        return data

    def __repr__(self):
        return '<Link(%r, %r, %r)>' % (self.id, self.source_id, self.target_id)
コード例 #7
0
class Selector(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    _text = db.Column('text', db.Unicode, index=True)
    normalized = db.Column(db.Unicode, index=True)

    created_at = db.Column(db.DateTime, default=datetime.utcnow)
    updated_at = db.Column(db.DateTime,
                           default=datetime.utcnow,
                           onupdate=datetime.utcnow)

    entity_id = db.Column(db.Unicode(50), db.ForeignKey('entity.id'))
    entity = db.relationship('Entity',
                             backref=db.backref(
                                 'selectors',
                                 lazy='dynamic',
                                 cascade='all, delete-orphan'))  # noqa

    @hybrid_property
    def text(self):
        return self._text

    @text.setter
    def text(self, text):
        self._text = text
        self.normalized = self.normalize(text)

    @classmethod
    def normalize(cls, text):
        return normalize(text)

    def __repr__(self):
        return '<Selector(%r, %r)>' % (self.entity_id, self.text)

    def __unicode__(self):
        return self.text
コード例 #8
0
class DocumentPage(db.Model):

    id = db.Column(db.BigInteger, primary_key=True)
    number = db.Column(db.Integer(), nullable=False)
    text = db.Column(db.Unicode(), nullable=False)
    document_id = db.Column(db.Integer(), db.ForeignKey('document.id'))
    document = db.relationship(
        Document, backref=db.backref('pages',
                                     cascade='all, delete-orphan'))  # noqa

    def __repr__(self):
        return '<DocumentPage(%r,%r)>' % (self.document_id, self.number)

    def text_parts(self):
        """Utility method to get all text snippets in a record."""
        if self.text is not None and len(self.text):
            yield self.text

    def to_dict(self):
        return {
            'id': self.id,
            'number': self.number,
            'text': self.text,
            'document_id': self.document_id
        }
コード例 #9
0
ファイル: document_tag.py プロジェクト: roukdanus/aleph
class DocumentTag(db.Model, IdModel):
    """A record reflects an entity or tag extracted from a document."""
    TEXT_LENGTH = 1024

    TYPE_PHONE = 'phone'
    TYPE_EMAIL = 'email'
    TYPE_PERSON = 'person'
    TYPE_ORGANIZATION = 'organization'
    TYPE_LOCATION = 'location'
    TYPE_IP = 'ip'
    TYPE_IBAN = 'iban'

    TYPES = {
        TYPE_PERSON: exactitude.names,
        TYPE_ORGANIZATION: exactitude.names,
        TYPE_EMAIL: exactitude.emails,
        TYPE_PHONE: exactitude.phones,
        TYPE_LOCATION: exactitude.addresses,
        TYPE_IP: exactitude.ips,
        TYPE_IBAN: exactitude.ibans,
    }

    id = db.Column(db.BigInteger, primary_key=True)
    origin = db.Column(db.Unicode(255), nullable=False, index=True)
    type = db.Column(db.Unicode(16), nullable=False)
    weight = db.Column(db.Integer, default=1)
    text = db.Column(db.Unicode(TEXT_LENGTH), nullable=True)

    document_id = db.Column(db.Integer(),
                            db.ForeignKey('document.id'),
                            index=True)  # noqa
    document = db.relationship(
        "Document", backref=db.backref('tags',
                                       cascade='all, delete-orphan'))  # noqa

    @property
    def field(self):
        type_ = self.TYPES[self.type]
        for (candidate, invert) in TYPES.values():
            if candidate == type_:
                return invert

    @classmethod
    def delete_by(cls, document_id=None, origin=None, type=None):
        pq = db.session.query(cls)
        assert document_id or origin or type
        if document_id is not None:
            pq = pq.filter(cls.document_id == document_id)
        if origin is not None:
            pq = pq.filter(cls.origin == origin)
        if type is not None:
            pq = pq.filter(cls.type == type)
        pq.delete()
        db.session.flush()

    def __repr__(self):
        return '<DocumentTag(%r,%r)>' % (self.document_id, self.text)
コード例 #10
0
ファイル: document_tag.py プロジェクト: vishalbelsare/aleph
class DocumentTag(db.Model, IdModel):
    """A record reflects an entity or tag extracted from a document."""
    TEXT_LENGTH = 1024

    TYPE_PHONE = 'phone'
    TYPE_EMAIL = 'email'
    TYPE_PERSON = 'person'
    TYPE_ORGANIZATION = 'organization'
    TYPE_LOCATION = 'location'
    TYPE_IP = 'ip'
    TYPE_IBAN = 'iban'
    TYPE_COUNTRY = 'country'
    TYPE_LANGUAGE = 'language'

    MAPPING = {
        TYPE_PERSON: 'namesMentioned',
        TYPE_ORGANIZATION: 'namesMentioned',
        TYPE_EMAIL: 'emailMentioned',
        TYPE_PHONE: 'phoneMentioned',
        TYPE_LOCATION: 'locationMentioned',
        TYPE_IP: 'ipMentioned',
        TYPE_IBAN: 'ibanMentioned',
        TYPE_COUNTRY: 'country',
        TYPE_LANGUAGE: 'language'
    }

    id = db.Column(db.BigInteger, primary_key=True)
    origin = db.Column(db.Unicode(255), nullable=False, index=True)
    type = db.Column(db.Unicode(16), nullable=False)
    weight = db.Column(db.Integer, default=1)
    text = db.Column(db.Unicode(TEXT_LENGTH), nullable=True)

    document_id = db.Column(db.Integer(), db.ForeignKey('document.id'), index=True)  # noqa
    document = db.relationship("Document", backref=db.backref('tags', cascade='all, delete-orphan'))  # noqa

    @property
    def field(self):
        type_ = registry.get(self.type)
        if type_ is not None and type_.group is not None:
            return type_.group

    @classmethod
    def delete_by(cls, document_id=None, origin=None, type=None):
        pq = db.session.query(cls)
        assert document_id or origin or type
        if document_id is not None:
            pq = pq.filter(cls.document_id == document_id)
        if origin is not None:
            pq = pq.filter(cls.origin == origin)
        if type is not None:
            pq = pq.filter(cls.type == type)
        pq.delete()
        db.session.flush()

    def __repr__(self):
        return '<DocumentTag(%r,%r)>' % (self.document_id, self.text)
コード例 #11
0
ファイル: reference.py プロジェクト: stefanw/aleph
class Reference(db.Model, IdModel, DatedModel):
    id = db.Column(db.Integer(), primary_key=True)
    document_id = db.Column(db.BigInteger, db.ForeignKey('document.id'))
    entity_id = db.Column(db.String(32), db.ForeignKey('entity.id'))
    weight = db.Column(db.Integer)

    entity = db.relationship(Entity,
                             backref=db.backref('references', lazy='dynamic'))
    document = db.relationship(Document,
                               backref=db.backref('references',
                                                  lazy='dynamic'))

    @classmethod
    def delete_document(cls, document_id):
        q = cls.all().filter_by(document_id=document_id)
        q.delete(synchronize_session='fetch')

    def __repr__(self):
        return '<Reference(%r, %r)>' % (self.document_id, self.entity_id)
コード例 #12
0
ファイル: reference.py プロジェクト: wilbrodn/aleph
class Reference(db.Model, IdModel, DatedModel):
    id = db.Column(db.Integer(), primary_key=True)
    origin = db.Column(db.String(128))
    weight = db.Column(db.Integer)

    document_id = db.Column(db.BigInteger,
                            db.ForeignKey('document.id'),
                            index=True)  # noqa
    document = db.relationship('Document',
                               backref=db.backref('references',
                                                  lazy='dynamic'))  # noqa

    entity_id = db.Column(db.String(32),
                          db.ForeignKey('entity.id'),
                          index=True)  # noqa
    entity = db.relationship('Entity',
                             backref=db.backref('references',
                                                lazy='dynamic'))  # noqa

    @classmethod
    def index_references(cls, document_id):
        """Helper function to get reference data for indexing."""
        # cf. aleph.index.entities.generate_entities()
        from aleph.model.entity import Entity
        q = db.session.query(Reference.entity_id, Entity.collection_id)
        q = q.filter(Reference.document_id == document_id)
        q = q.filter(Entity.id == Reference.entity_id)
        q = q.filter(Entity.state == Entity.STATE_ACTIVE)
        return q.all()

    def to_dict(self):
        return {
            'entity': {
                'id': self.entity.id,
                'name': self.entity.name,
                '$schema': self.entity.type
            },
            'weight': self.weight,
            'origin': self.origin
        }

    def __repr__(self):
        return '<Reference(%r, %r)>' % (self.document_id, self.entity_id)
コード例 #13
0
ファイル: entity_details.py プロジェクト: stefanw/aleph
class EntityIdentifier(db.Model, EntityDetails):
    _schema = '/entity/identifier.json#'
    __tablename__ = 'entity_identifier'

    entity_id = db.Column(db.String(32), db.ForeignKey('entity.id'), index=True)
    entity = db.relationship('Entity', primaryjoin="and_(Entity.id == foreign(EntityIdentifier.entity_id), "  # noqa
                                                        "EntityIdentifier.deleted_at == None)",  # noqa
                             backref=db.backref('identifiers', lazy='dynamic', cascade='all, delete-orphan'))  # noqa
    identifier = db.Column(db.Unicode)
    scheme = db.Column(db.Unicode)
コード例 #14
0
ファイル: entity_identity.py プロジェクト: wilbrodn/aleph
class EntityIdentity(db.Model, IdModel, DatedModel):
    CONFIRMED = 1
    REJECTED = 2
    UNDECIDED = 3

    JUDGEMENTS = [1, 2, 3]

    entity_id = db.Column(db.String(32),
                          db.ForeignKey('entity.id'),
                          index=True)  # noqa
    entity = db.relationship('Entity',
                             backref=db.backref('identities',
                                                lazy='dynamic'))  # noqa
    match_id = db.Column(db.String(254), index=True, nullable=False)
    judgement = db.Column(db.Integer(), nullable=False)
    judge_id = db.Column(db.Integer, db.ForeignKey('role.id'), nullable=True)

    @classmethod
    def judgements_by_entity(cls, entity_id):
        q = db.session.query(cls.match_id, cls.judgement)
        q = q.filter(cls.entity_id == entity_id)
        return {k: v for k, v in q.all()}

    @classmethod
    def entity_ids(cls, entity_id):
        q = db.session.query(cls.match_id)
        q = q.filter(cls.entity_id == entity_id)
        q = q.filter(cls.judgement == cls.CONFIRMED)
        ids = [entity_id]
        for mapped_id, in q.all():
            ids.append(mapped_id)
        return ids

    @classmethod
    def by_entity_match(cls, entity_id, match_id):
        q = db.session.query(cls)
        q = q.filter(cls.entity_id == entity_id)
        q = q.filter(cls.match_id == match_id)
        return q.first()

    @classmethod
    def save(cls, entity_id, match_id, judgement, judge=None):
        obj = cls.by_entity_match(entity_id, match_id)
        if obj is None:
            obj = cls()
            obj.entity_id = entity_id
            obj.match_id = match_id
        obj.judgement = judgement
        obj.judge = judge
        db.session.add(obj)
        return obj

    def __repr__(self):
        return 'EntityIdentity(%r, %r, %r)' % (self.entity_id, self.match_id,
                                               self.judgement)
コード例 #15
0
class Alert(db.Model, DatedModel):
    """A subscription to notifications on a given query."""

    __tablename__ = "alert"

    id = db.Column(db.Integer, primary_key=True)
    query = db.Column(db.Unicode, nullable=True)
    notified_at = db.Column(db.DateTime, nullable=True)

    role_id = db.Column(db.Integer, db.ForeignKey("role.id"), index=True)
    role = db.relationship(Role, backref=db.backref("alerts",
                                                    lazy="dynamic"))  # noqa

    def update(self):
        self.notified_at = datetime.utcnow()
        db.session.add(self)
        db.session.flush()

    def to_dict(self):
        data = self.to_dict_dates()
        data.update({
            "id": stringify(self.id),
            "query": self.query,
            "role_id": stringify(self.role_id),
            "notified_at": self.notified_at,
        })
        return data

    @classmethod
    def by_id(cls, id, role_id=None):
        q = cls.all().filter_by(id=id)
        if role_id is not None:
            q = q.filter(cls.role_id == role_id)
        return q.first()

    @classmethod
    def by_role_id(cls, role_id):
        q = cls.all()
        q = q.filter(cls.role_id == role_id)
        q = q.order_by(cls.created_at.desc())
        q = q.order_by(cls.id.desc())
        return q

    @classmethod
    def create(cls, data, role_id):
        alert = cls()
        alert.role_id = role_id
        alert.query = stringify(data.get("query"))
        alert.update()
        return alert

    def __repr__(self):
        return "<Alert(%r, %r)>" % (self.id, self.query)
コード例 #16
0
ファイル: entity_details.py プロジェクト: stefanw/aleph
class EntityContactDetail(db.Model, EntityDetails):
    _schema = '/entity/contact_detail.json#'

    entity_id = db.Column(db.String(32), db.ForeignKey('entity.id'), index=True)
    entity = db.relationship('EntityLegalPerson', primaryjoin="and_(Entity.id == foreign(EntityContactDetail.entity_id), "  # noqa
                                                              "EntityContactDetail.deleted_at == None)",  # noqa
                             backref=db.backref('contact_details', lazy='dynamic', cascade='all, delete-orphan'))  # noqa

    label = db.Column(db.Unicode)
    type = db.Column(db.Unicode)
    note = db.Column(db.Unicode)
    valid_from = db.Column(db.DateTime)
    valid_until = db.Column(db.DateTime)
コード例 #17
0
class DocumentRecord(db.Model):
    """A record reflects a row or page of a document."""

    id = db.Column(db.BigInteger, primary_key=True)
    sheet = db.Column(db.Integer, nullable=True)
    index = db.Column(db.Integer, nullable=True, index=True)
    text = db.Column(db.Unicode, nullable=True)
    data = db.Column(JSONB, nullable=True)

    document_id = db.Column(db.Integer(),
                            db.ForeignKey('document.id'),
                            index=True)  # noqa
    document = db.relationship(
        "Document",
        backref=db.backref('records', cascade='all, delete-orphan'))  # noqa

    def text_parts(self):
        """Utility method to get all text snippets in a record."""
        if self.data is not None:
            for value in self.data.values():
                text = string_value(value)
                if text is not None:
                    yield text
        text = string_value(self.text)
        if text is not None:
            yield text

    @classmethod
    def find_records(cls, document_id, ids):
        if not len(ids):
            return []
        q = db.session.query(cls)
        q = q.filter(cls.document_id == document_id)
        q = q.filter(cls.id.in_(ids))
        return q

    def to_dict(self):
        return {
            'id': self.id,
            'sheet': self.sheet,
            'index': self.index,
            'data': self.data,
            'text': self.text,
            'document_id': self.document_id
        }

    def __repr__(self):
        return '<DocumentRecord(%r,%r)>' % (self.document_id, self.index)
コード例 #18
0
ファイル: document_record.py プロジェクト: renesugar/aleph
class DocumentRecord(db.Model):
    """A record reflects a row or page of a document."""

    id = db.Column(db.BigInteger, primary_key=True)
    sheet = db.Column(db.Integer, nullable=True)
    index = db.Column(db.Integer, nullable=True, index=True)
    text = db.Column(db.Unicode, nullable=True)
    data = db.Column(JSONB, nullable=True)

    document_id = db.Column(db.Integer(),
                            db.ForeignKey('document.id'),
                            index=True)  # noqa
    document = db.relationship(
        "Document",
        backref=db.backref('records', cascade='all, delete-orphan'))  # noqa

    @property
    def texts(self):
        """Utility method to get all text snippets in a record."""
        if self.data is not None:
            for value in self.data.values():
                yield value
        yield self.text

    @classmethod
    def find_records(cls, ids):
        if not len(ids):
            return []
        q = db.session.query(cls)
        q = q.filter(cls.id.in_(ids))
        return q

    @classmethod
    def by_index(cls, document_id, index):
        q = db.session.query(cls)
        q = db.session.query(DocumentRecord)
        q = q.filter(cls.document_id == document_id)
        q = q.filter(cls.index == index)
        return q.first()

    def __repr__(self):
        return '<DocumentRecord(%r,%r)>' % (self.document_id, self.index)
コード例 #19
0
class DocumentPage(db.Model):

    id = db.Column(db.BigInteger, primary_key=True)
    number = db.Column(db.Integer(), nullable=False)
    text = db.Column(db.Unicode(), nullable=False)
    document_id = db.Column(db.Integer(), db.ForeignKey('document.id'))
    document = db.relationship(
        Document, backref=db.backref('pages',
                                     cascade='all, delete-orphan'))  # noqa

    def __repr__(self):
        return '<DocumentPage(%r,%r)>' % (self.document_id, self.number)

    def to_dict(self):
        return {
            'id': self.id,
            'number': self.number,
            'text': self.text,
            'document_id': self.document_id
        }
コード例 #20
0
class EntityTag(db.Model):
    id = db.Column(db.Integer(), primary_key=True)
    collection = db.Column(db.Unicode(100))
    package_id = db.Column(db.Unicode(100))

    entity_id = db.Column(db.Unicode(50), db.ForeignKey('entity.id'))
    entity = db.relationship(Entity,
                             backref=db.backref('tags', lazy='dynamic'))

    created_at = db.Column(db.DateTime, default=datetime.utcnow)

    @classmethod
    def delete_set(cls, collection, package_id):
        q = db.session.query(cls)
        q = q.filter_by(collection=collection)
        q = q.filter_by(package_id=package_id)
        q.delete()

    @classmethod
    def by_package(cls, collection, package_id):
        etag = aliased(cls)
        ent = aliased(Entity)
        q = db.session.query(etag.entity_id, ent.label, ent.category,
                             ent.list_id)
        q = q.join(ent, ent.id == etag.entity_id)
        q = q.filter(etag.collection == collection)
        q = q.filter(etag.package_id == package_id)
        entities = []
        for entity_id, label, category, lst in q.all():
            entities.append({
                'id': entity_id,
                'entity': entity_id,
                'label': label,
                'category': category,
                'list': lst
            })
        return entities

    def __repr__(self):
        return '<EntityTag(%r, %r)>' % (self.package_id, self.entity_id)
コード例 #21
0
class DocumentRecord(db.Model):

    id = db.Column(db.BigInteger, primary_key=True)
    sheet = db.Column(db.Integer, nullable=False)
    row_id = db.Column(db.Integer, nullable=False)
    data = db.Column(JSONB)
    document_id = db.Column(db.Integer(),
                            db.ForeignKey('document.id'),
                            index=True)  # noqa
    document = db.relationship(
        Document, backref=db.backref('records',
                                     cascade='all, delete-orphan'))  # noqa

    @property
    def tid(self):
        tid = sha1(str(self.document_id))
        tid.update(str(self.sheet))
        tid.update(str(self.row_id))
        return tid.hexdigest()

    def text_parts(self):
        """Utility method to get all text snippets in a record."""
        for value in self.data.values():
            text = string_value(value)
            if text is not None:
                yield value

    @classmethod
    def find_rows(cls, document_id, rows):
        if not len(rows):
            return []
        q = db.session.query(cls)
        q = q.filter(cls.document_id == document_id)
        clauses = [and_(cls.sheet == r[0], cls.row_id == r[1]) for r in rows]
        q = q.filter(or_(*clauses))
        return q

    def __repr__(self):
        return '<DocumentRecord(%r,%r)>' % (self.document_id, self.row_id)
コード例 #22
0
ファイル: document_tag.py プロジェクト: Ro9ueAdmin/aleph
class DocumentTag(db.Model, IdModel):
    """A record reflects an entity or tag extracted from a document."""

    TYPE_PHONE = 'phone'
    TYPE_EMAIL = 'email'
    TYPE_PERSON = 'person'
    TYPE_ORGANIZATION = 'organization'
    TYPE_LOCATION = 'location'

    id = db.Column(db.BigInteger, primary_key=True)
    origin = db.Column(db.Unicode(255), nullable=False, index=True)
    type = db.Column(db.Unicode(16), nullable=False)
    weight = db.Column(db.Integer, default=1)
    key = db.Column(db.Unicode(1024), nullable=False, index=True)
    text = db.Column(db.Unicode(1024), nullable=True)

    document_id = db.Column(db.Integer(),
                            db.ForeignKey('document.id'),
                            index=True)  # noqa
    document = db.relationship(
        "Document", backref=db.backref('tags',
                                       cascade='all, delete-orphan'))  # noqa

    @classmethod
    def delete_by(cls, document_id=None, origin=None, type=None):
        pq = db.session.query(cls)
        assert document_id or origin or type
        if document_id is not None:
            pq = pq.filter(cls.document_id == document_id)
        if origin is not None:
            pq = pq.filter(cls.origin == origin)
        if type is not None:
            pq = pq.filter(cls.type == type)
        pq.delete()
        db.session.flush()

    def __repr__(self):
        return '<DocumentTag(%r,%r)>' % (self.document_id, self.key)
コード例 #23
0
ファイル: mapping.py プロジェクト: x0rzkov/aleph
class Mapping(db.Model, SoftDeleteModel):
    """A mapping to load entities from a table"""
    __tablename__ = 'mapping'

    FAILED = 'failed'
    SUCCESS = 'success'
    STATUS = {SUCCESS: lazy_gettext('success'), FAILED: lazy_gettext('failed')}

    id = db.Column(db.Integer, primary_key=True)
    query = db.Column('query', JSONB)

    role_id = db.Column(db.Integer, db.ForeignKey('role.id'), index=True)
    role = db.relationship(Role,
                           backref=db.backref('mappings',
                                              lazy='dynamic'))  # noqa

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('mappings',
                                                    lazy='dynamic'))  # noqa

    table_id = db.Column(db.String(ENTITY_ID_LEN), index=True)

    last_run_status = db.Column(db.Unicode, nullable=True)
    last_run_err_msg = db.Column(db.Unicode, nullable=True)

    def update(self, query=None, table_id=None):
        self.updated_at = datetime.utcnow()
        if query:
            self.query = query
        if table_id:
            self.table_id = table_id
        db.session.add(self)
        db.session.commit()

    def set_status(self, status, error=None):
        self.last_run_status = status
        self.last_run_err_msg = error
        db.session.add(self)
        db.session.commit()

    def delete(self, deleted_at=None):
        self.deleted_at = deleted_at or datetime.utcnow()
        db.session.add(self)
        db.session.commit()

    def to_dict(self):
        data = self.to_dict_dates()
        status = self.STATUS.get(self.last_run_status)
        data.update({
            'id': stringify(self.id),
            'query': dict(self.query),
            'role_id': stringify(self.role_id),
            'collection_id': stringify(self.collection_id),
            'table_id': self.table_id,
            'last_run_status': status,
            'last_run_err_msg': self.last_run_err_msg
        })
        return data

    @classmethod
    def by_collection(cls, collection_id, table_id=None):
        q = cls.all().filter(cls.collection_id == collection_id)
        if table_id is not None:
            q = q.filter(cls.table_id == table_id)
        return q

    @classmethod
    def delete_by_collection(cls, collection_id, deleted_at=None):
        deleted_at = deleted_at or datetime.utcnow()
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq = pq.filter(cls.deleted_at == None)  # noqa
        pq.update({cls.deleted_at: deleted_at}, synchronize_session=False)

    @classmethod
    def create(cls, query, table_id, collection, role_id):
        mapping = cls()
        mapping.role_id = role_id
        mapping.query = query
        mapping.collection_id = collection.id
        mapping.table_id = table_id
        mapping.update()
        return mapping

    def __repr__(self):
        return '<Mapping(%r, %r)>' % (self.id, self.table_id)
コード例 #24
0
class Entity(db.Model, SoftDeleteModel):
    THING = 'Thing'
    LEGAL_ENTITY = 'LegalEntity'

    id = db.Column(db.String(ENTITY_ID_LEN),
                   primary_key=True,
                   default=make_textid,
                   nullable=False,
                   unique=False)
    name = db.Column(db.Unicode)
    schema = db.Column(db.String(255), index=True)
    foreign_id = db.Column(db.Unicode)
    data = db.Column('data', JSONB)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('entities',
                                                    lazy='dynamic'))  # noqa

    @property
    def model(self):
        return model.get(self.schema)

    @property
    def signed_id(self):
        return self.collection.ns.sign(self.id)

    def delete_matches(self):
        pq = db.session.query(Match)
        pq = pq.filter(
            or_(Match.entity_id == self.id, Match.match_id == self.id))
        pq.delete(synchronize_session=False)
        db.session.refresh(self)

    def delete(self, deleted_at=None):
        self.delete_matches()
        deleted_at = deleted_at or datetime.utcnow()
        super(Entity, self).delete(deleted_at=deleted_at)

    def update(self, entity):
        proxy = model.get_proxy(entity)
        proxy.schema.validate(entity)
        self.schema = proxy.schema.name
        previous = self.to_proxy()
        for prop in proxy.iterprops():
            # Do not allow the user to overwrite hashes because this could
            # lead to a user accessing random objects.
            if prop.type == registry.checksum:
                proxy.set(prop, previous.get(prop), cleaned=True, quiet=True)
        self.data = proxy.properties
        self.updated_at = datetime.utcnow()
        db.session.add(self)

    def to_proxy(self):
        proxy = model.get_proxy({
            'id': self.id,
            'schema': self.schema,
            'properties': self.data
        })
        proxy.add('name', self.name)
        proxy.set('indexUpdatedAt', self.updated_at)
        return proxy

    @classmethod
    def create(cls, data, collection):
        foreign_id = data.get('foreign_id')
        ent = cls.by_foreign_id(foreign_id, collection.id, deleted=True)
        if ent is None:
            ent = cls()
            ent.id = make_textid()
            ent.collection = collection
            ent.foreign_id = foreign_id
            ent.data = {}
        ent.deleted_at = None
        ent.update(data)
        return ent

    @classmethod
    def by_id(cls, entity_id, collection_id=None):
        entity_id, _ = Namespace.parse(entity_id)
        q = cls.all()
        q = q.filter(cls.id == entity_id)
        return q.first()

    @classmethod
    def by_foreign_id(cls, foreign_id, collection_id, deleted=False):
        if foreign_id is None:
            return None
        q = cls.all(deleted=deleted)
        q = q.filter(Entity.collection_id == collection_id)
        q = q.filter(cls.foreign_id == foreign_id)
        q = q.order_by(Entity.deleted_at.desc().nullsfirst())
        return q.first()

    @classmethod
    def by_collection(cls, collection_id):
        return cls.all().filter(Entity.collection_id == collection_id)

    @classmethod
    def delete_by_collection(cls, collection_id, deleted_at=None):
        deleted_at = deleted_at or datetime.utcnow()

        entities = db.session.query(cls.id)
        entities = entities.filter(cls.collection_id == collection_id)
        entities = entities.subquery()

        pq = db.session.query(Match)
        pq = pq.filter(Match.entity_id.in_(entities))
        pq.delete(synchronize_session=False)

        pq = db.session.query(Match)
        pq = pq.filter(Match.match_id.in_(entities))
        pq.delete(synchronize_session=False)

        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq = pq.filter(cls.deleted_at == None)  # noqa
        pq.update({cls.deleted_at: deleted_at}, synchronize_session=False)

    def __repr__(self):
        return '<Entity(%r, %r)>' % (self.id, self.name)
コード例 #25
0
class Entity(db.Model, DatedModel):
    THING = "Thing"
    LEGAL_ENTITY = "LegalEntity"

    id = db.Column(
        db.String(ENTITY_ID_LEN),
        primary_key=True,
        default=make_textid,
        nullable=False,
        unique=False,
    )
    schema = db.Column(db.String(255), index=True)
    data = db.Column("data", JSONB)

    role_id = db.Column(db.Integer, db.ForeignKey("role.id"),
                        nullable=True)  # noqa
    collection_id = db.Column(db.Integer,
                              db.ForeignKey("collection.id"),
                              index=True)
    collection = db.relationship(Collection,
                                 backref=db.backref("entities",
                                                    lazy="dynamic"))

    @property
    def model(self):
        return model.get(self.schema)

    def update(self, data, collection):
        proxy = model.get_proxy(data, cleaned=False)
        proxy = collection.ns.apply(proxy)
        self.id = collection.ns.sign(self.id)
        self.schema = proxy.schema.name
        self.updated_at = datetime.utcnow()
        previous = self.to_proxy()
        for prop in proxy.schema.properties.values():
            # Do not allow the user to overwrite hashes because this could
            # lead to a user accessing random objects.
            if prop.type == registry.checksum:
                prev = previous.get(prop)
                proxy.set(prop, prev, cleaned=True, quiet=True)
        self.data = proxy.properties
        db.session.add(self)

    def to_proxy(self):
        data = {
            "id": self.id,
            "schema": self.schema,
            "properties": self.data,
            "created_at": iso_text(self.created_at),
            "updated_at": iso_text(self.updated_at),
            "role_id": self.role_id,
            "mutable": True,
        }
        return model.get_proxy(data, cleaned=False)

    @classmethod
    def create(cls, data, collection, role_id=None):
        entity = cls()
        entity_id = data.get("id") or make_textid()
        if not registry.entity.validate(entity_id):
            raise InvalidData(gettext("Invalid entity ID"))
        entity.id = collection.ns.sign(entity_id)
        entity.collection_id = collection.id
        entity.role_id = role_id
        entity.update(data, collection)
        return entity

    @classmethod
    def by_id(cls, entity_id, collection=None):
        q = cls.all().filter(cls.id == entity_id)
        if collection is not None:
            q = q.filter(cls.collection_id == collection.id)
        return q.first()

    @classmethod
    def by_collection(cls, collection_id):
        q = cls.all()
        q = q.filter(Entity.collection_id == collection_id)
        q = q.yield_per(5000)
        return q

    @classmethod
    def delete_by_collection(cls, collection_id):
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq.delete(synchronize_session=False)

    def __repr__(self):
        return "<Entity(%r, %r)>" % (self.id, self.schema)
コード例 #26
0
class Alert(db.Model, SoftDeleteModel):
    """A subscription to notifications on a given query."""
    __tablename__ = 'alert'

    id = db.Column(db.Integer, primary_key=True)
    query = db.Column(db.Unicode, nullable=True)
    notified_at = db.Column(db.DateTime, nullable=True)

    role_id = db.Column(db.Integer, db.ForeignKey('role.id'), index=True)
    role = db.relationship(Role, backref=db.backref('alerts',
                                                    lazy='dynamic'))  # noqa

    @property
    def normalized(self):
        return normalize(self.query)

    def delete(self, deleted_at=None):
        self.deleted_at = deleted_at or datetime.utcnow()
        db.session.add(self)
        db.session.flush()

    def update(self):
        self.notified_at = datetime.utcnow()
        db.session.add(self)
        db.session.flush()

    def is_same(self, other):
        if other.role_id != self.role_id:
            return False
        if other.normalized != self.normalized:
            return False
        return True

    @classmethod
    def by_id(cls, id, role_id=None):
        q = cls.all().filter_by(id=id)
        if role_id is not None:
            q = q.filter(cls.role_id == role_id)
        return q.first()

    @classmethod
    def by_role_id(cls, role_id):
        q = cls.all()
        q = q.filter(cls.role_id == role_id)
        q = q.order_by(cls.created_at.desc())
        q = q.order_by(cls.id.desc())
        return q

    @classmethod
    def create(cls, data, role_id):
        alert = cls()
        alert.role_id = role_id
        alert.query = stringify(data.get('query'))
        alert.update()
        return alert

    @classmethod
    def dedupe(cls):
        alerts = cls.all()
        for (left, right) in permutations(alerts, 2):
            if left.id >= right.id:
                continue
            if left.is_same(right):
                left.delete()

    def __repr__(self):
        return '<Alert(%r, %r)>' % (self.id, self.query)
コード例 #27
0
ファイル: alert.py プロジェクト: tomjie/aleph
class Alert(db.Model, SoftDeleteModel):
    """A subscription to notifications on a given query."""

    __tablename__ = 'alert'

    id = db.Column(db.Integer, primary_key=True)
    role_id = db.Column(db.Integer, db.ForeignKey('role.id'), index=True)
    custom_label = db.Column(db.Unicode, nullable=True)
    query_text = db.Column(db.Unicode, nullable=True)
    entity_id = db.Column(db.String(32),
                          db.ForeignKey('entity.id'),
                          nullable=True)  # noqa
    entity = db.relationship(Entity,
                             backref=db.backref('alerts',
                                                lazy='dynamic'))  # noqa
    notified_at = db.Column(db.DateTime, nullable=True)

    @property
    def label(self):
        if self.custom_label is not None:
            return self.custom_label
        if self.entity:
            return self.entity.name
        return self.query_text

    def delete(self, deleted_at=None):
        self.deleted_at = deleted_at or datetime.utcnow()
        db.session.add(self)
        db.session.flush()

    def update(self):
        self.notified_at = datetime.utcnow()
        db.session.add(self)
        db.session.flush()

    def is_same(self, other):
        if other.role_id == self.role_id:
            if other.entity_id == self.entity_id:
                if other.query_text == self.query_text:
                    return True
        return False

    @classmethod
    def by_id(cls, id, role=None):
        q = cls.all().filter_by(id=id)
        if role is not None:
            q = q.filter(cls.role_id == role.id)
        return q.first()

    @classmethod
    def by_role(cls, role):
        return cls.all().filter(cls.role_id == role.id)

    @classmethod
    def create(cls, data, role):
        validate(data, 'alert.json#')
        alert = cls()
        alert.role_id = role.id
        alert.query_text = data.get('query_text')
        if alert.query_text is not None:
            alert.query_text = alert.query_text.strip()
            alert.query_text = alert.query_text or None
        alert.entity_id = data.get('entity_id') or None
        alert.custom_label = data.get('label')
        alert.update()
        return alert

    @classmethod
    def exists(cls, query, role):
        q = cls.all_ids().filter(cls.role_id == role.id)
        query_text = query.get('q')
        if query_text is not None:
            query_text = query_text.strip()
            if not len(query_text):
                query_text = None
        q = q.filter(cls.query_text == query_text)
        entities = query.getlist('entity')
        if len(entities) == 1:
            q = q.filter(cls.entity_id == entities[0])
        else:
            q = q.filter(cls.entity_id == None)  # noqa
        q = q.limit(1)
        return q.scalar()

    @classmethod
    def dedupe(cls, entity_id):
        alerts = cls.all().filter_by(entity_id=entity_id).all()
        for left in alerts:
            for right in alerts:
                if left.id >= right.id:
                    continue
                if left.is_same(right):
                    left.delete()

    def __repr__(self):
        return '<Alert(%r, %r)>' % (self.id, self.label)

    def to_query(self):
        return MultiDict({
            'q': self.query_text or '',
            'entity': self.entity_id
        })

    def to_dict(self):
        return {
            'id': self.id,
            'label': self.label,
            'role_id': self.role_id,
            'query_text': self.query_text,
            'entity_id': self.entity_id,
            'created_at': self.created_at,
            'notified_at': self.notified_at,
            'updated_at': self.updated_at
        }
コード例 #28
0
class Entity(db.Model, UuidModel, SoftDeleteModel):
    STATE_ACTIVE = 'active'
    STATE_PENDING = 'pending'
    STATE_DELETED = 'deleted'

    name = db.Column(db.Unicode)
    type = db.Column(db.String(255), index=True)
    state = db.Column(db.String(128),
                      nullable=True,
                      default=STATE_ACTIVE,
                      index=True)  # noqa
    foreign_ids = db.Column(ARRAY(db.Unicode()))
    data = db.Column('data', JSONB)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('entities',
                                                    lazy='dynamic'))  # noqa

    def delete_references(self, origin=None):
        pq = db.session.query(Reference)
        pq = pq.filter(Reference.entity_id == self.id)
        if origin is not None:
            pq = pq.filter(Reference.origin == origin)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete_identities(self):
        pq = db.session.query(EntityIdentity)
        pq = pq.filter(EntityIdentity.entity_id == self.id)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete(self, deleted_at=None):
        self.delete_references()
        self.delete_identities()
        deleted_at = deleted_at or datetime.utcnow()
        for alert in self.alerts:
            alert.delete(deleted_at=deleted_at)
        self.state = self.STATE_DELETED
        super(Entity, self).delete(deleted_at=deleted_at)

    @classmethod
    def delete_dangling(cls, collection_id):
        """Delete dangling entities.

        Entities can dangle in pending state while they have no references
        pointing to them, thus making it impossible to enable them. This is
        a routine cleanup function.
        """
        q = db.session.query(cls)
        q = q.filter(cls.collection_id == collection_id)
        q = q.filter(cls.state == cls.STATE_PENDING)
        q = q.outerjoin(Reference)
        q = q.group_by(cls)
        q = q.having(func.count(Reference.id) == 0)
        for entity in q.all():
            entity.delete()

    def merge(self, other):
        if self.id == other.id:
            raise ValueError("Cannot merge an entity with itself.")
        if self.collection_id != other.collection_id:
            raise ValueError(
                "Cannot merge entities from different collections.")  # noqa

        data = merge_data(self.data, other.data)
        if self.name.lower() != other.name.lower():
            data = merge_data(data, {'alias': [other.name]})

        self.data = data
        self.state = self.STATE_ACTIVE
        self.foreign_ids = self.foreign_ids or []
        self.foreign_ids += other.foreign_ids or []
        self.created_at = min((self.created_at, other.created_at))
        self.updated_at = datetime.utcnow()

        # update alerts
        from aleph.model.alert import Alert
        q = db.session.query(Alert).filter(Alert.entity_id == other.id)
        q.update({'entity_id': self.id})

        # update document references
        from aleph.model.reference import Reference
        q = db.session.query(Reference).filter(Reference.entity_id == other.id)
        q.update({'entity_id': self.id})

        # delete source entities
        other.delete()
        db.session.add(self)
        db.session.commit()
        db.session.refresh(other)

    def update(self, entity):
        data = entity.get('data') or {}
        data['name'] = entity.get('name')
        self.data = self.schema.validate(data)
        self.name = self.data.pop('name')
        fid = [string_value(f) for f in entity.get('foreign_ids') or []]
        self.foreign_ids = list(set([f for f in fid if f is not None]))
        self.state = entity.pop('state', self.STATE_ACTIVE)
        self.updated_at = datetime.utcnow()
        db.session.add(self)

    @classmethod
    def save(cls, data, collection, merge=False):
        ent = cls.by_id(data.get('id'))
        if ent is None:
            ent = cls()
            ent.type = data.pop('schema', None)
            if ent.type is None:
                raise ValueError("No schema provided.")
            ent.id = make_textid()

        if merge:
            data = merge_data(data, ent.to_dict())

        if collection is None:
            raise ValueError("No collection specified.")

        ent.collection = collection
        ent.update(data)
        return ent

    @classmethod
    def filter_collections(cls, q, collections=None):
        if collections is None:
            return q
        collection_ids = []
        for collection in collections:
            if isinstance(collection, Collection):
                collection = collection.id
            collection_ids.append(collection)
        q = q.filter(Entity.collection_id.in_(collection_ids))
        return q

    @classmethod
    def by_id_set(cls, ids, collections=None):
        if not len(ids):
            return {}
        q = cls.all()
        q = cls.filter_collections(q, collections=collections)
        q = q.options(joinedload('collection'))
        q = q.filter(cls.id.in_(ids))
        entities = {}
        for ent in q:
            entities[ent.id] = ent
        return entities

    @classmethod
    def by_foreign_id(cls, foreign_id, collection_id, deleted=False):
        foreign_id = string_value(foreign_id)
        if foreign_id is None:
            return None
        q = cls.all(deleted=deleted)
        q = q.filter(Entity.collection_id == collection_id)
        foreign_id = func.cast([foreign_id], ARRAY(db.Unicode()))
        q = q.filter(cls.foreign_ids.contains(foreign_id))
        q = q.order_by(Entity.deleted_at.desc().nullsfirst())
        return q.first()

    @classmethod
    def latest(cls):
        q = db.session.query(func.max(cls.updated_at))
        q = q.filter(cls.state == cls.STATE_ACTIVE)
        return q.scalar()

    @property
    def schema(self):
        return schemata.get(self.type)

    @property
    def terms(self):
        terms = set([self.name])
        for alias in ensure_list(self.data.get('alias')):
            if alias is not None and len(alias):
                terms.add(alias)
        return terms

    @property
    def regex_terms(self):
        # This is to find the shortest possible regex for each entity.
        # If, for example, and entity matches both "Al Qaeda" and
        # "Al Qaeda in Iraq, Syria and the Levant", it is useless to
        # search for the latter.
        terms = set([normalize_strong(t) for t in self.terms])
        regex_terms = set()
        for term in terms:
            if term is None or len(term) < 4 or len(term) > 120:
                continue
            contained = False
            for other in terms:
                if other is None or other == term:
                    continue
                if other in term:
                    contained = True
            if not contained:
                regex_terms.add(term)
        return regex_terms

    def to_dict(self):
        data = super(Entity, self).to_dict()
        data.update({
            'schema': self.type,
            'name': self.name,
            'state': self.state,
            'data': self.data,
            'foreign_ids': self.foreign_ids or [],
            'collection_id': self.collection_id
        })
        return data

    def to_index(self):
        entity = self.to_dict()
        entity['properties'] = {'name': [self.name]}
        for k, v in self.data.items():
            v = ensure_list(v)
            if len(v):
                entity['properties'][k] = v
        return entity

    def to_ref(self):
        return {
            'id': self.id,
            'label': self.name,
            'schema': self.type,
            'collection_id': self.collection_id
        }

    def __unicode__(self):
        return self.name

    def __repr__(self):
        return '<Entity(%r, %r)>' % (self.id, self.name)
コード例 #29
0
class Export(db.Model, IdModel, DatedModel):
    """A data export run in the background. The data is stored in a cloud
    storage bucket and the user is given a link to download the data. The link
    expires after a fixed duration and the exported data is deleted."""

    DEFAULT_EXPIRATION = timedelta(days=30)  # After 30 days

    label = db.Column(db.Unicode)
    operation = db.Column(db.Unicode)
    creator_id = db.Column(db.Integer, db.ForeignKey("role.id"))
    creator = db.relationship(Role,
                              backref=db.backref("exports", lazy="dynamic"))
    collection_id = db.Column(db.Integer,
                              db.ForeignKey("collection.id"),
                              index=True,
                              nullable=True)
    collection = db.relationship(Collection,
                                 backref=db.backref("exports", lazy="dynamic"))

    expires_at = db.Column(db.DateTime, default=None, nullable=True)
    deleted = db.Column(db.Boolean, default=False)
    status = db.Column("export_status", db.Unicode, default=Status.DEFAULT)

    content_hash = db.Column(db.Unicode(65), index=True, nullable=True)
    file_size = db.Column(db.BigInteger, nullable=True)  # In bytes
    file_name = db.Column(db.Unicode, nullable=True)
    mime_type = db.Column(db.Unicode)
    meta = db.Column(JSONB, default={})

    def to_dict(self):
        data = self.to_dict_dates()
        data.update({
            "id": stringify(self.id),
            "label": self.label,
            "operation": self.operation,
            "creator_id": stringify(self.creator_id),
            "collection_id": self.collection_id,
            "expires_at": self.expires_at,
            "deleted": self.deleted,
            "status": Status.LABEL.get(self.status),
            "content_hash": self.content_hash,
            "file_size": self.file_size,
            "file_name": self.file_name,
            "mime_type": self.mime_type,
            "meta": self.meta,
        })
        return data

    @classmethod
    def create(cls,
               operation,
               role_id,
               label,
               collection=None,
               mime_type=None,
               meta=None):
        export = cls()
        export.creator_id = role_id
        export.operation = operation
        export.label = label
        if collection is not None:
            export.collection_id = collection.id
        export.mime_type = mime_type
        export.expires_at = datetime.utcnow() + cls.DEFAULT_EXPIRATION
        export.meta = meta or {}
        db.session.add(export)
        return export

    @property
    def namespace(self):
        return make_key("role", self.creator_id)

    def set_status(self, status):
        self.status = status
        db.session.add(self)

    def should_delete_publication(self):
        """Check whether the published export should be deleted from the archive

        Since we store exports by contenthash, there may be other non-expired exports
        that point to the same file in the archive"""
        q = (Export.all().filter(
            Export.content_hash == self.content_hash).filter(
                Export.deleted.isnot(True)).filter(Export.id != self.id))
        return q.first() is None

    @classmethod
    def get_expired(cls, deleted=False):
        now = datetime.utcnow()
        q = cls.all()
        q = q.filter(cls.expires_at <= now)
        if not deleted:
            q = q.filter(cls.deleted == deleted)
        return q

    @classmethod
    def get_pending(cls):
        q = cls.all()
        q = q.filter(cls.status == Status.PENDING)
        q = q.filter(cls.deleted == False)  # noqa
        return q

    @classmethod
    def by_id(cls, id, role_id=None, deleted=False):
        q = cls.all().filter_by(id=id)
        if role_id is not None:
            q = q.filter(cls.creator_id == role_id)
        if not deleted:
            q = q.filter(cls.deleted == False)  # noqa
        return q.first()

    @classmethod
    def by_role_id(cls, role_id, deleted=False):
        q = cls.all()
        q = q.filter(cls.creator_id == role_id)
        if not deleted:
            q = q.filter(cls.deleted == False)  # noqa
        q = q.order_by(cls.created_at.desc())
        return q

    @classmethod
    def by_content_hash(cls, content_hash, deleted=False):
        q = cls.all()
        q = q.filter(cls.content_hash == content_hash)
        if not deleted:
            q = q.filter(cls.deleted == False)  # noqa
        return q

    def __repr__(self):
        return "<Export(%r, %r, %r)>" % (self.id, self.creator_id, self.label)
コード例 #30
0
ファイル: entity.py プロジェクト: kkrbalam/aleph
class Entity(db.Model, UuidModel, SoftDeleteModel):
    THING = 'Thing'

    name = db.Column(db.Unicode)
    schema = db.Column(db.String(255), index=True)
    foreign_ids = db.Column(ARRAY(db.Unicode()))
    data = db.Column('data', JSONB)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('entities',
                                                    lazy='dynamic'))  # noqa

    @property
    def model(self):
        return model.get(self.schema)

    @property
    def terms(self):
        terms = set([self.name])
        for alias in ensure_list(self.data.get('alias')):
            if alias is not None and len(alias):
                terms.add(alias)
        return terms

    @property
    def regex_terms(self):
        # This is to find the shortest possible regex for each entity.
        # If, for example, and entity matches both "Al Qaeda" and
        # "Al Qaeda in Iraq, Syria and the Levant", it is useless to
        # search for the latter.
        terms = set([match_form(t) for t in self.terms])
        regex_terms = set()
        for term in terms:
            if term is None or len(term) < 4 or len(term) > 120:
                continue
            contained = False
            for other in terms:
                if other is None or other == term:
                    continue
                if other in term:
                    contained = True
            if not contained:
                regex_terms.add(term)
        return regex_terms

    def delete_matches(self):
        pq = db.session.query(Match)
        pq = pq.filter(
            or_(Match.entity_id == self.id, Match.match_id == self.id))
        pq.delete(synchronize_session=False)
        db.session.refresh(self)

    def delete(self, deleted_at=None):
        self.delete_matches()
        deleted_at = deleted_at or datetime.utcnow()
        for alert in self.alerts:
            alert.delete(deleted_at=deleted_at)
        super(Entity, self).delete(deleted_at=deleted_at)

    @classmethod
    def delete_by_collection(cls, collection_id, deleted_at=None):
        from aleph.model import Alert
        deleted_at = deleted_at or datetime.utcnow()

        entities = db.session.query(cls.id)
        entities = entities.filter(cls.collection_id == collection_id)
        entities = entities.subquery()

        pq = db.session.query(Alert)
        pq = pq.filter(Alert.entity_id.in_(entities))
        pq.update({Alert.deleted_at: deleted_at}, synchronize_session=False)

        pq = db.session.query(Match)
        pq = pq.filter(Match.entity_id.in_(entities))
        pq.delete(synchronize_session=False)

        pq = db.session.query(Match)
        pq = pq.filter(Match.match_id.in_(entities))
        pq.delete(synchronize_session=False)

        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq = pq.filter(cls.deleted_at == None)  # noqa
        pq.update({cls.deleted_at: deleted_at}, synchronize_session=False)

    def merge(self, other):
        if self.id == other.id:
            raise ValueError("Cannot merge an entity with itself.")
        if self.collection_id != other.collection_id:
            raise ValueError(
                "Cannot merge entities from different collections.")  # noqa

        self.schema = model.precise_schema(self.schema, other.schema)
        self.foreign_ids = string_set(self.foreign_ids, self.foreign_ids)
        self.created_at = min((self.created_at, other.created_at))
        self.updated_at = datetime.utcnow()

        data = merge_data(self.data, other.data)
        if self.name != other.name:
            data = merge_data(data, {'alias': [other.name]})
        self.data = data

        # update alerts
        from aleph.model.alert import Alert
        q = db.session.query(Alert).filter(Alert.entity_id == other.id)
        q.update({Alert.entity_id: self.id})

        # delete source entities
        other.delete()
        db.session.add(self)
        db.session.commit()
        db.session.refresh(other)

    def update(self, entity):
        self.schema = entity.get('schema')

        data = entity.get('properties')
        if is_mapping(data):
            data['name'] = [entity.get('name')]
            self.data = self.model.validate(data)
        elif self.data is None:
            self.data = {}

        self.data.pop('name', None)
        self.name = entity.get('name')

        # TODO: should this be mutable?
        # self.foreign_ids = string_set(entity.get('foreign_ids'))
        self.updated_at = datetime.utcnow()
        db.session.add(self)

    @classmethod
    def create(cls, data, collection):
        foreign_ids = string_set(data.get('foreign_ids'))
        ent = cls.by_foreign_ids(foreign_ids, collection.id, deleted=True)
        if ent is None:
            ent = cls()
            ent.id = make_textid()
            ent.collection = collection
            ent.foreign_ids = foreign_ids
        ent.update(data)
        ent.deleted_at = None
        return ent

    @classmethod
    def by_foreign_ids(cls, foreign_ids, collection_id, deleted=False):
        if not len(foreign_ids):
            return None
        q = cls.all(deleted=deleted)
        q = q.filter(Entity.collection_id == collection_id)
        foreign_id = func.cast(foreign_ids, ARRAY(db.Unicode()))
        q = q.filter(cls.foreign_ids.contains(foreign_id))
        q = q.order_by(Entity.deleted_at.desc().nullsfirst())
        return q.first()

    @classmethod
    def all_ids(cls, deleted=False, authz=None):
        q = super(Entity, cls).all_ids(deleted=deleted)
        if authz is not None and not authz.is_admin:
            q = q.join(Permission,
                       cls.collection_id == Permission.collection_id)
            q = q.filter(Permission.deleted_at == None)  # noqa
            q = q.filter(Permission.read == True)  # noqa
            q = q.filter(Permission.role_id.in_(authz.roles))
        return q

    @classmethod
    def latest(cls):
        q = db.session.query(func.max(cls.updated_at))
        q = q.filter(cls.deleted_at == None)  # noqa
        return q.scalar()

    def __repr__(self):
        return '<Entity(%r, %r)>' % (self.id, self.name)