class Hit(db.Model): __tablename__ = "hit" id = Column(Integer, primary_key=True, autoincrement=True, nullable=False) view_id = db.Column(db.Integer, db.ForeignKey(View.id)) #: time of view viewed_at = Column(DateTime, default=datetime.utcnow, nullable=True)
class Document(BaseContent, PathAndSecurityIndexable): """A document, in the CMIS sense. """ __tablename__ = None # type: str __indexable__ = True __indexation_args__ = {} __indexation_args__.update(BaseContent.__indexation_args__) index_to = tuple() index_to += BaseContent.__indexation_args__.setdefault('index_to', ()) index_to += PathAndSecurityIndexable.__indexation_args__.setdefault( 'index_to', ()) index_to += (('text', ('text', )), ) __indexation_args__['index_to'] = index_to del index_to _indexable_roles_and_users = PathAndSecurityIndexable.\ _indexable_roles_and_users parent = relationship("Folder", primaryjoin=(foreign( CmisObject._parent_id) == remote(Folder.id)), backref=backref('documents', lazy="joined", order_by='Document.title', cascade='all, delete-orphan')) PREVIEW_SIZE = 700 @property def preview_size(self): return self.PREVIEW_SIZE def has_preview(self, size=None, index=0): if size is None: size = self.PREVIEW_SIZE return converter.has_image(self.content_digest, self.content_type, index, size) @property def digest(self): """Alias for content_digest.""" return self.content_digest _text_id = Column(Integer, db.ForeignKey(Blob.id), info=NOT_AUDITABLE) text_blob = relationship(Blob, cascade='all, delete', foreign_keys=[_text_id]) _pdf_id = Column(Integer, db.ForeignKey(Blob.id), info=NOT_AUDITABLE) pdf_blob = relationship(Blob, cascade='all, delete', foreign_keys=[_pdf_id]) _preview_id = Column(Integer, db.ForeignKey(Blob.id), info=NOT_AUDITABLE) preview_blob = relationship(Blob, cascade='all, delete', foreign_keys=[_preview_id]) language = Column(Text, info=dict(searchable=True, index_to=[('language', wf.ID(stored=True))])) size = Column(Integer) page_num = Column(Integer, default=1) #FIXME: use Entity.meta instead #: Stores extra metadata as a JSON column extra_metadata_json = Column(UnicodeText, info=dict(auditable=False)) sbe_type = 'cmis:document' # antivirus status def ensure_antivirus_scheduled(self): if not self.antivirus_required: return True if current_app.config.get('CELERY_ALWAYS_EAGER', False): async_conversion(self) return True task_id = self.content_blob.meta.get('antivirus_task_id') if task_id is not None: res = tasks.process_document.AsyncResult(task_id) if not res.failed(): # success, or pending or running return True # schedule a new task self.content_blob.meta['antivirus_task_id'] = str(uuid.uuid4()) async_conversion(self) @property def antivirus_scanned(self): """ True if antivirus task was run, even if antivirus didn't return a result """ return self.content_blob and 'antivirus' in self.content_blob.meta @property def antivirus_status(self): """ True: antivirus has scanned file: no virus False: antivirus has scanned file: virus detected None: antivirus task was run, but antivirus didn't return a result """ return self.content_blob and self.content_blob.meta.get('antivirus') @property def antivirus_required(self): """True if antivirus doesn't need to be run. """ required = current_app.config['ANTIVIRUS_CHECK_REQUIRED'] return required and (not self.antivirus_scanned or self.antivirus_status is None) @property def antivirus_ok(self): """True if user can safely access document content. """ required = current_app.config['ANTIVIRUS_CHECK_REQUIRED'] if required: return self.antivirus_status is True return self.antivirus_status is not False # R/W properties @BaseContent.content.setter def content(self, value): BaseContent.content.fset(self, value) self.content_blob.meta['antivirus_task_id'] = str(uuid.uuid4()) self.pdf_blob = None self.text_blob = None def set_content(self, content, content_type=None): super(Document, self).set_content(content, content_type) async_conversion(self) @property def pdf(self): if self.pdf_blob: assert isinstance(self.pdf_blob.value, bytes) return self.pdf_blob and self.pdf_blob.value @pdf.setter def pdf(self, value): assert isinstance(value, bytes) self.pdf_blob = Blob() self.pdf_blob.value = value # `text` is an Unicode value. @property def text(self): return (self.text_blob.value.decode("utf8") if self.text_blob is not None else u'') @text.setter def text(self, value): assert isinstance(value, text_type) self.text_blob = Blob() self.text_blob.value = value.encode("utf8") @property def extra_metadata(self): if not hasattr(self, '_extra_metadata'): if self._extra_metadata is not None: self._extra_metadata = json.loads(self.extra_metadata_json) else: self._extra_metadata = None return self._extra_metadata @extra_metadata.setter def extra_metadata(self, extra_metadata): self._extra_metadata = extra_metadata self.extra_metadata_json = text_type(json.dumps(extra_metadata)) # TODO: or use SQLAlchemy alias? @property def file_name(self): return self.title def __repr__(self): return "<Document id=%r name=%r path=%r content_length=%d at 0x%x>" % ( self.id, self.title, self.path, self.content_length, id(self), ) # locking management; used for checkin/checkout - this could be generalized to # any entity @property def lock(self): """ :returns: either `None` if no lock or current lock is expired; either the current valid :class:`Lock` instance. """ lock = self.meta.setdefault('abilian.sbe.documents', {}).get('lock') if lock: lock = Lock(**lock) if lock.expired: lock = None return lock @lock.setter def lock(self, user): """Allow to do `document.lock = user` to set a lock for user. If user is None, the lock is released. """ if user is None: del self.lock return self.set_lock(user=user) @lock.deleter def lock(self): """Remove lock, if any. `del document.lock` can be safely done even if no lock is set. """ meta = self.meta.setdefault('abilian.sbe.documents', {}) if 'lock' in meta: del meta['lock'] self.meta.changed() def set_lock(self, user=None): if user is None: user = current_user lock = self.lock if lock and not lock.is_owner(user=user): raise RuntimeError( 'This document is already locked by another user') meta = self.meta.setdefault('abilian.sbe.documents', {}) lock = Lock.new() meta['lock'] = lock.as_dict() self.meta.changed()
class BaseContent(CmisObject): """A base class for cmisobject with an attached file. """ __tablename__ = None # type: str # type: str _content_id = Column(Integer, db.ForeignKey(Blob.id)) content_blob = relationship(Blob, cascade='all, delete', foreign_keys=[_content_id]) #: md5 digest (BTW: not sure they should be part of the public API). content_digest = Column(Text) #: size (in bytes) of the content blob. content_length = Column(Integer, default=0, nullable=False, server_default=sa.text('0'), info=dict(searchable=True, index_to=(('content_length', wf.NUMERIC(stored=True)), ))) #: MIME type of the content stream. # TODO: normalize mime type? content_type = Column(Text, default="application/octet-stream", info=dict(searchable=True, index_to=(('content_type', wf.ID(stored=True)), ))) @property def content(self): return self.content_blob.value @content.setter def content(self, value): assert isinstance(value, bytes) self.content_blob = Blob() self.content_blob.value = value self.content_length = len(value) def set_content(self, content, content_type=None): new_digest = hashlib.md5(content).hexdigest() if new_digest == self.content_digest: return self.content_digest = new_digest self.content = content content_type = self.find_content_type(content_type) if content_type: self.content_type = content_type def find_content_type(self, content_type=None): """Find possibly more appropriate content_type for this instance. If `content_type` is a binary one, try to find a better one based on content name so that 'xxx.pdf' is not flagged as binary/octet-stream for example """ if not content_type or content_type in ('application/octet-stream', 'binary/octet-stream', 'application/binary', 'multipart/octet-stream'): # absent or generic content type: try to find something more useful to be # able to do preview/indexing/... guessed_content_type = mimetypes.guess_type(self.title, strict=False)[0] if (guessed_content_type and guessed_content_type != 'application/vnd.ms-office.activeX'): # mimetypes got an update: "random.bin" would be guessed as # 'application/vnd.ms-office.activeX'... not so useful in a document # repository content_type = guessed_content_type return content_type @property def icon(self): icon = icon_for(self.content_type) if not icon.endswith("/bin.png"): return icon # Try harder, just in case. XXX: Could be probably removed later when we are # sure that all our bases are covered. if "." not in self.title: return icon_url('bin.png') suffix = self.title.split(".")[-1] icon = u'{}.png'.format(suffix) if icon_exists(icon): return icon_url(icon) else: return icon_url('bin.png')