class AbstractDocumentModel(AbstractDocumentManagementModel): """Base document model class """ name = fields.char_field(blank=False, null=False, max_length=constants.NAME_FIELD_MAX_LENGTH) language = fields.char_field(blank=False, null=False, default=settings.LANGUAGE_CODE) # deletion of client will result in deletion of its associated documents client = fields.foreign_key_field(Client, on_delete=CASCADE) mime_type = fields.char_field(blank=False, null=False, default=constants.MIME_TYPE_UNKNOWN, choices=constants.MIME_TYPE_CHOICES) document_type = fields.char_field( blank=False, null=False, default=constants.DOCUMENT_TYPE_REFERENCE, choices=constants.DOCUMENT_TYPE_CHOICES) title = fields.char_field(blank=True, null=True, max_length=constants.TITLE_FIELD_MAX_LENGTH) description = fields.description_field( blank=True, null=True, max_length=constants.DESCRIPTION_FIELD_MAX_LENGTH) category = fields.foreign_key_field(Category, blank=True, null=True, on_delete=SET_NULL) tags = fields.many_to_many_field(Tag, related_name='documents', through='DocumentTag', through_fields=('document', 'tag')) annotations = fields.many_to_many_field(Annotation, related_name='documents', through='DocumentAnnotation', through_fields=('document', 'annotation')) #@ TODO: this is not a symetrical relationship -> it is unidirectional using from_document documents = fields.many_to_many_field('self', symmetrical=False, related_name='related_documents', through='DocumentAssociation', through_fields=('from_document', 'to_document')) class Meta(AbstractDocumentManagementModel.Meta): """Meta class definition""" abstract = True unique_together = ('client', 'name') def __str__(self): """pretty format instance as string""" return self.name
class Client(AbstractDocumentManagementModel): """Client class definition A system client is one designed to support 'free standing' users not associated with an official client. """ client_id = fields.char_field(blank=False, null=False, max_length=constants.ID_FIELD_MAX_LEGNTH) name = fields.char_field(blank=False, null=False, max_length=constants.NAME_FIELD_MAX_LENGTH) email = fields.email_field(blank=True, null=True) phone = fields.phone_number_field(blank=True, null=True) description = fields.description_field(blank=True, null=True, max_length=constants.DESCRIPTION_FIELD_MAX_LENGTH) # indicate whether the client type is system or not is_system = fields.boolean_field(blank=False, null=False, default=False) class Meta(AbstractDocumentManagementModel.Meta): """Model meta class declaration.""" db_table = db_table(app_label, _client) verbose_name = _(_client_verbose) verbose_name_plural = _(pluralize(_client_verbose)) def __str__(self): """pretty format instance as string""" return self.client_id
class Annotation(AbstractDocumentManagementModel): """Annotation class definition Allows to annotate another model instance. """ name = fields.char_field(blank=False, null=False, max_length=constants.NAME_FIELD_MAX_LENGTH) annotation = fields.annotation_field( blank=False, null=False, max_length=constants.ANNOTATION_FIELD_MAX_LENGTH) # deletion of client will result in deletion of associated annotations client = fields.foreign_key_field(Client, on_delete=CASCADE) class Meta(AbstractDocumentManagementModel.Meta): """Meta class definition""" abstract = False db_table = db_table(app_label, _annotation) verbose_name = _(_annotation_verbose) verbose_name_plural = _(pluralize(_annotation_verbose)) unique_together = ('client', 'name') def __str__(self): """generate pretty string representation""" return self.name
class DocumentAssociation(AbstractDocumentManagementModel): """Document association model class. A document may be associated with 0 or more other documents Document(1) -------> Document(0..*) """ from_document = fields.foreign_key_field(Document, related_name='from_document', on_delete=CASCADE) to_document = fields.foreign_key_field(Document, related_name='to_document', on_delete=CASCADE) client = fields.foreign_key_field(Client, on_delete=CASCADE) # relationship purpose purpose = fields.char_field( blank=False, null=False, default=constants.DOCUMENT_ASSOCIATION_PURPOSE_UNKNOWN, choices=constants.DOCUMENT_ASSOCIATION_PURPOSE_CHOICES) class Meta(AbstractDocumentManagementModel.Meta): """Model meta class definition""" db_table = db_table(app_label, _document_association) verbose_name = _(_document_association_verbose) verbose_name_plural = _(pluralize(_document_association_verbose)) unique_together = ('from_document', 'to_document', 'purpose') def __str__(self): """pretty format instance as string""" return '({},{},{})'.format(str(self.from_document), str(self.to_document), self.purpose)
class AbstractResultsModel(AbstractAnalyticsModel): """Base results model class """ input = fields.json_field(blank=False, null=False) output = fields.json_field(blank=False, null=False) name = fields.char_field(blank=True, null=True, max_length=constants.NAME_FIELD_MAX_LENGTH) # deletion of client will result in deletion of its associated documents client = fields.foreign_key_field(Client, on_delete=CASCADE) description = fields.description_field( blank=True, null=True, max_length=constants.DESCRIPTION_FIELD_MAX_LENGTH) class Meta(AbstractAnalyticsModel.Meta): """Meta class definition""" abstract = True def __str__(self): """pretty format instance as string""" return self.name if self.name else super().__str__()
class AbstractClassification(AbstractDocumentManagementModel): """Abstract classification class definition Allows to associate a classification with an instance. """ name = fields.char_field(blank=False, null=False, max_length=constants.NAME_FIELD_MAX_LENGTH) # deletion of client will result in deletion of associated classifications client = fields.foreign_key_field(Client, on_delete=CASCADE) # deletion of parent will result in parent being set to null parent = fields.foreign_key_field('self', blank=True, null=True, related_name='children', on_delete=SET_NULL) description = fields.description_field( blank=True, null=True, max_length=constants.DESCRIPTION_FIELD_MAX_LENGTH) # the classification population (i.e. AuxiliaryDocument, Reference Document) target = fields.char_field(blank=False, null=False, default=constants.CLASSIFICATION_TARGET_REFERENCE_DOCUMENT, choices=constants.CLASSIFICATION_TARGET_CHOICES) # the industry or other top level classification such as finance, insurance, shipping, general domain = fields.char_field(blank=False, null=False, default=constants.CLASSIFICATION_DOMAIN_GENERAL, choices=constants.CLASSIFICATION_DOMAIN_CHOICES) class Meta(AbstractDocumentManagementModel.Meta): """Meta class definition""" abstract = True unique_together = ('client', 'name') def clean(self): """Model wide validation""" # pylint: disable=no-member if self.parent: if not self.parent.id: raise ValidationError(_('Invalid parent - has not been saved.')) # verify that parent exists - if not exception will be thrown self.__class__.objects.get(pk=self.parent.id) # check the entire hierarchy for name and id duplicates depth = 0 max_depth = 10 current = self name_cache = dict() id_cache = dict() while current and depth < max_depth: current_name = current.name current_id = current.id if current_name in name_cache: raise ValidationError(_(f'Duplicate name {current_name} in hierarchy.')) if current_id in id_cache: raise ValidationError(_(f'Duplicate id {current_id} in hierarchy.')) name_cache[current_name] = current id_cache[current_id] = current if current.parent: current = self.__class__.objects.get(pk=current.parent.id) depth += 1 else: current = current.parent if depth >= max_depth: raise ValidationError(_(f'Max depth of {max_depth} exceeded.')) super(AbstractClassification, self).clean() def __str__(self): """generate pretty string representation""" full_path = [self.name] current = self.parent while current is not None: full_path.append(current.name) current = current.parent # pylint: disable=no-member return ' -> '.join(full_path[::-1])
class AbstractDerivedDocumentModel(Model): """Derived document model base class""" document = fields.one_to_one_field(to_class=Document, primary_key=True, on_delete=CASCADE) # table specific content for table space future optimization content = fields.text_field(blank=True, null=True, max_length=constants.CONTENT_FIELD_MAX_LENGTH) # designates system managed files uploaded by end user or on his behalf upload = fields.constrained_file_field( blank=True, null=True, upload_to=client_directory_path, storage=overwrite_storage, content_types=constants.MIME_TYPES, max_upload_size=constants.UPLOAD_FIELD_MAX_FILE_SIZE) # server mounted file system path dir_path = fields.char_field( blank=True, null=True, max_length=constants.DIR_PATH_FIELD_MAX_LENGTH) class Meta: """Meta class definition""" app_label = app_label abstract = True def __str__(self): """pretty format instance as string""" return self.document.name @classmethod def my_document_type(cls): """return document type""" return constants.DOCUMENT_TYPE_UNKNOWN def clean(self): """model wide validation """ # pylint: disable=no-member,broad-except # valdiate that upload, content, dir_path are not empty if not (self.content or self.upload or self.dir_path): raise ValidationError(_('Must set "content" or "upload" fields.')) # valdiate that upload and content are not both set if self.content and self.upload: raise ValidationError( _('Can not set both "content" and "upload" fields.')) if self.upload: self.mime_type = self.upload.field.content_type # pylint: disable=attribute-defined-outside-init,no-member if self.content: content = self.content try: mime_type = magic.from_buffer(content, mime=True) except Exception: _logger.exception( 'failed to fetch mime_type for id: %s name: %s', self.document.id, self.document.name) raise ValidationError(_(f'Undetected mime type.')) if mime_type not in constants.MIME_TYPES: raise ValidationError(_(f'Invalid mime type {mime_type}.')) try: content = ftfy.fix_text(self.content) except Exception: _logger.exception('failed to fix text for id: %s name: %s', self.document.id, self.document.name) self.content = content self.document.mime_type = mime_type if not self.document.mime_type: # mime type was not discovered on content or file upload data self.document.mime_type = constants.MIME_TYPE_UNKNOWN super(AbstractDerivedDocumentModel, self).clean() def save(self, force_insert=False, force_update=False, using=None, update_fields=None): """save the instance Overriding to ensure instance validity across multiple fields. Requires implementation due to different model inheritance """ self.full_clean() # @TODO: saving again the underlying model as mime_type has changed, # review the approach self.document.save() # pylint: disable=no-member return super(AbstractDerivedDocumentModel, self).save(force_insert, force_update, using, update_fields) def get_file_contents(self): """get file contents if file has been uploaded""" # @TODO: not handling file contents if file has not been uploaded # (i.e. dir_path is set and upload is not set) data = None if self.upload: # pylint: disable=no-member try: with open(self.upload.path) as input_file: data = input_file.read() except IOError as ex: _logger.error('invalid file %s exc %s', self.upload.path, ex) return data def get_text(self): """get text data""" if self.content: return self.content return self.get_file_contents()