Beispiel #1
0
class Hit(db.Model):
    __tablename__ = "hit"

    id = Column(Integer, primary_key=True, autoincrement=True, nullable=False)

    view_id = db.Column(db.Integer, db.ForeignKey(View.id))

    #: time of view
    viewed_at = Column(DateTime, default=datetime.utcnow, nullable=True)
Beispiel #2
0
class Document(BaseContent, PathAndSecurityIndexable):
    """A document, in the CMIS sense.
    """
    __tablename__ = None  # type: str

    __indexable__ = True
    __indexation_args__ = {}
    __indexation_args__.update(BaseContent.__indexation_args__)
    index_to = tuple()
    index_to += BaseContent.__indexation_args__.setdefault('index_to', ())
    index_to += PathAndSecurityIndexable.__indexation_args__.setdefault(
        'index_to', ())
    index_to += (('text', ('text', )), )
    __indexation_args__['index_to'] = index_to
    del index_to

    _indexable_roles_and_users = PathAndSecurityIndexable.\
                                 _indexable_roles_and_users

    parent = relationship("Folder",
                          primaryjoin=(foreign(
                              CmisObject._parent_id) == remote(Folder.id)),
                          backref=backref('documents',
                                          lazy="joined",
                                          order_by='Document.title',
                                          cascade='all, delete-orphan'))

    PREVIEW_SIZE = 700

    @property
    def preview_size(self):
        return self.PREVIEW_SIZE

    def has_preview(self, size=None, index=0):
        if size is None:
            size = self.PREVIEW_SIZE

        return converter.has_image(self.content_digest, self.content_type,
                                   index, size)

    @property
    def digest(self):
        """Alias for content_digest."""
        return self.content_digest

    _text_id = Column(Integer, db.ForeignKey(Blob.id), info=NOT_AUDITABLE)
    text_blob = relationship(Blob,
                             cascade='all, delete',
                             foreign_keys=[_text_id])

    _pdf_id = Column(Integer, db.ForeignKey(Blob.id), info=NOT_AUDITABLE)
    pdf_blob = relationship(Blob,
                            cascade='all, delete',
                            foreign_keys=[_pdf_id])

    _preview_id = Column(Integer, db.ForeignKey(Blob.id), info=NOT_AUDITABLE)
    preview_blob = relationship(Blob,
                                cascade='all, delete',
                                foreign_keys=[_preview_id])

    language = Column(Text,
                      info=dict(searchable=True,
                                index_to=[('language', wf.ID(stored=True))]))
    size = Column(Integer)
    page_num = Column(Integer, default=1)

    #FIXME: use Entity.meta instead
    #: Stores extra metadata as a JSON column
    extra_metadata_json = Column(UnicodeText, info=dict(auditable=False))

    sbe_type = 'cmis:document'

    # antivirus status
    def ensure_antivirus_scheduled(self):
        if not self.antivirus_required:
            return True

        if current_app.config.get('CELERY_ALWAYS_EAGER', False):
            async_conversion(self)
            return True

        task_id = self.content_blob.meta.get('antivirus_task_id')
        if task_id is not None:
            res = tasks.process_document.AsyncResult(task_id)
            if not res.failed():
                # success, or pending or running
                return True

        # schedule a new task
        self.content_blob.meta['antivirus_task_id'] = str(uuid.uuid4())
        async_conversion(self)

    @property
    def antivirus_scanned(self):
        """
        True if antivirus task was run, even if antivirus didn't return a result
        """
        return self.content_blob and 'antivirus' in self.content_blob.meta

    @property
    def antivirus_status(self):
        """
        True: antivirus has scanned file: no virus
        False: antivirus has scanned file: virus detected
        None: antivirus task was run, but antivirus didn't return a result
        """
        return self.content_blob and self.content_blob.meta.get('antivirus')

    @property
    def antivirus_required(self):
        """True if antivirus doesn't need to be run.
        """
        required = current_app.config['ANTIVIRUS_CHECK_REQUIRED']
        return required and (not self.antivirus_scanned
                             or self.antivirus_status is None)

    @property
    def antivirus_ok(self):
        """True if user can safely access document content.
        """
        required = current_app.config['ANTIVIRUS_CHECK_REQUIRED']
        if required:
            return self.antivirus_status is True

        return self.antivirus_status is not False

    # R/W properties
    @BaseContent.content.setter
    def content(self, value):
        BaseContent.content.fset(self, value)
        self.content_blob.meta['antivirus_task_id'] = str(uuid.uuid4())
        self.pdf_blob = None
        self.text_blob = None

    def set_content(self, content, content_type=None):
        super(Document, self).set_content(content, content_type)
        async_conversion(self)

    @property
    def pdf(self):
        if self.pdf_blob:
            assert isinstance(self.pdf_blob.value, bytes)
        return self.pdf_blob and self.pdf_blob.value

    @pdf.setter
    def pdf(self, value):
        assert isinstance(value, bytes)
        self.pdf_blob = Blob()
        self.pdf_blob.value = value

    # `text` is an Unicode value.
    @property
    def text(self):
        return (self.text_blob.value.decode("utf8")
                if self.text_blob is not None else u'')

    @text.setter
    def text(self, value):
        assert isinstance(value, text_type)
        self.text_blob = Blob()
        self.text_blob.value = value.encode("utf8")

    @property
    def extra_metadata(self):
        if not hasattr(self, '_extra_metadata'):
            if self._extra_metadata is not None:
                self._extra_metadata = json.loads(self.extra_metadata_json)
            else:
                self._extra_metadata = None
        return self._extra_metadata

    @extra_metadata.setter
    def extra_metadata(self, extra_metadata):
        self._extra_metadata = extra_metadata
        self.extra_metadata_json = text_type(json.dumps(extra_metadata))

    # TODO: or use SQLAlchemy alias?
    @property
    def file_name(self):
        return self.title

    def __repr__(self):
        return "<Document id=%r name=%r path=%r content_length=%d at 0x%x>" % (
            self.id,
            self.title,
            self.path,
            self.content_length,
            id(self),
        )

    # locking management; used for checkin/checkout - this could be generalized to
    # any entity
    @property
    def lock(self):
        """
        :returns: either `None` if no lock or current lock is expired; either the
        current valid :class:`Lock` instance.
        """
        lock = self.meta.setdefault('abilian.sbe.documents', {}).get('lock')
        if lock:
            lock = Lock(**lock)
            if lock.expired:
                lock = None

        return lock

    @lock.setter
    def lock(self, user):
        """Allow to do `document.lock = user` to set a lock for user.

        If user is None, the lock is released.
        """
        if user is None:
            del self.lock
            return

        self.set_lock(user=user)

    @lock.deleter
    def lock(self):
        """Remove lock, if any.

        `del document.lock` can be safely done even if no lock is set.
        """
        meta = self.meta.setdefault('abilian.sbe.documents', {})
        if 'lock' in meta:
            del meta['lock']
            self.meta.changed()

    def set_lock(self, user=None):
        if user is None:
            user = current_user

        lock = self.lock
        if lock and not lock.is_owner(user=user):
            raise RuntimeError(
                'This document is already locked by another user')

        meta = self.meta.setdefault('abilian.sbe.documents', {})
        lock = Lock.new()
        meta['lock'] = lock.as_dict()
        self.meta.changed()
Beispiel #3
0
class BaseContent(CmisObject):
    """A base class for cmisobject with an attached file.
    """
    __tablename__ = None  # type: str  # type: str

    _content_id = Column(Integer, db.ForeignKey(Blob.id))
    content_blob = relationship(Blob,
                                cascade='all, delete',
                                foreign_keys=[_content_id])

    #: md5 digest (BTW: not sure they should be part of the public API).
    content_digest = Column(Text)

    #: size (in bytes) of the content blob.
    content_length = Column(Integer,
                            default=0,
                            nullable=False,
                            server_default=sa.text('0'),
                            info=dict(searchable=True,
                                      index_to=(('content_length',
                                                 wf.NUMERIC(stored=True)), )))

    #: MIME type of the content stream.
    # TODO: normalize mime type?
    content_type = Column(Text,
                          default="application/octet-stream",
                          info=dict(searchable=True,
                                    index_to=(('content_type',
                                               wf.ID(stored=True)), )))

    @property
    def content(self):
        return self.content_blob.value

    @content.setter
    def content(self, value):
        assert isinstance(value, bytes)
        self.content_blob = Blob()
        self.content_blob.value = value
        self.content_length = len(value)

    def set_content(self, content, content_type=None):
        new_digest = hashlib.md5(content).hexdigest()
        if new_digest == self.content_digest:
            return

        self.content_digest = new_digest
        self.content = content
        content_type = self.find_content_type(content_type)
        if content_type:
            self.content_type = content_type

    def find_content_type(self, content_type=None):
        """Find possibly more appropriate content_type for this instance.

        If `content_type` is a binary one, try to find a better one based on
        content name so that 'xxx.pdf' is not flagged as binary/octet-stream for
        example
        """
        if not content_type or content_type in ('application/octet-stream',
                                                'binary/octet-stream',
                                                'application/binary',
                                                'multipart/octet-stream'):
            # absent or generic content type: try to find something more useful to be
            # able to do preview/indexing/...
            guessed_content_type = mimetypes.guess_type(self.title,
                                                        strict=False)[0]
            if (guessed_content_type and guessed_content_type !=
                    'application/vnd.ms-office.activeX'):
                # mimetypes got an update: "random.bin" would be guessed as
                # 'application/vnd.ms-office.activeX'... not so useful in a document
                # repository
                content_type = guessed_content_type

        return content_type

    @property
    def icon(self):
        icon = icon_for(self.content_type)

        if not icon.endswith("/bin.png"):
            return icon

        # Try harder, just in case. XXX: Could be probably removed later when we are
        # sure that all our bases are covered.
        if "." not in self.title:
            return icon_url('bin.png')

        suffix = self.title.split(".")[-1]
        icon = u'{}.png'.format(suffix)
        if icon_exists(icon):
            return icon_url(icon)
        else:
            return icon_url('bin.png')