class PageHit(DocType): doc_id = Long() fc_user_id = Long() product_id = Long() permissions = Long(multi=True) access_time = Date() origin_ip = Ip() request_type = Text() uri = Text() page_number = Long() is_full = Boolean() is_known_article = Boolean() is_known_product = Boolean() class Meta: index = 'hits' def save(self, **kwargs): return super(PageHit, self).save(**kwargs) @classmethod def properties(cls): props = PageHit._doc_type.mapping.properties.to_dict( )['doc']['properties'].keys() return [prop for prop in props]
class Facebook(DataHead): group_detail_list = ['id', 'cover', 'description', 'name'] post_extra_data = ['id', 'message', 'updated_time', 'permalink_url'] keywords = Keyword(multi=True) group_name = Text() post_date = Date() extra_data = Nested() group_extra_data = Nested( properties=dict(map(lambda x: (x, Text()), group_detail_list)))
class Pipeline_Document(Base_Document): """Represents the stored pipelines.""" # id already defined by Elasticsearch # updated_at = Date() #FIXME long format not compatible with date # created_at = Date(required=True) #FIXME long format not compatible with date name = Text() status = Text(required=True) user = Text() class Index: """Elasticsearch configuration.""" name = 'pipeline'
class Movie(Document): title = Text(fields={'raw': {'type': 'keyword'}}) film_rating = Text() duration = Text() genre = Keyword(multi=True) release_date = Text() release_date_unix_time = Float() imdb_ratingValue = Float() imdb_bestRating = Float() imdb_ratingCount = Float() description = Text() storyline = Text() poster = Text() trailer_img = Text() director = Keyword(multi=True) creator = Keyword(multi=True) writer = Keyword(multi=True) stars = Keyword(multi=True) taglines = Keyword(multi=True) url = Keyword() req_headers = Object(enabled=False) res_headers = Object(enabled=False) suggest = Completion(analyzer=ngram_analyzer, search_analyzer=analyzer('standard')) class Index: name = 'imdb'
class Movie(DocType): title = Text(fields={'raw': {'type': 'keyword'}}) summary = Text() datePublished = Date() creators = Keyword(multi=True) genres = Keyword(multi=True) casts = Keyword(multi=True) time = Integer() countries = Keyword(multi=True) plot_keywords = Keyword(multi=True) languages = Keyword(multi=True) rating = Float() poster = Keyword() suggest = Completion(analyzer=ngram_analyzer, search_analyzer=analyzer('standard')) class Meta: index = 'imdb'
def get_es_mapping(cls): es_mapping = super(Topic, cls).get_es_mapping() es_mapping.field('title', Text(boost=1.5)) es_mapping.field('tags', Text(boost=2.0)) es_mapping.field('subtitle', Text()) es_mapping.field('is_solved', Boolean()) es_mapping.field('is_locked', Boolean()) es_mapping.field('is_sticky', Boolean()) es_mapping.field('pubdate', Date()) es_mapping.field('forum_pk', Integer()) # not indexed: es_mapping.field('get_absolute_url', Keyword(index=False)) es_mapping.field('forum_title', Text(index=False)) es_mapping.field('forum_get_absolute_url', Keyword(index=False)) return es_mapping
def get_es_mapping(cls): es_mapping = super(Topic, cls).get_es_mapping() es_mapping.field("title", Text(boost=1.5)) es_mapping.field("tags", Text(boost=2.0)) es_mapping.field("subtitle", Text()) es_mapping.field("is_solved", Boolean()) es_mapping.field("is_locked", Boolean()) es_mapping.field("is_sticky", Boolean()) es_mapping.field("pubdate", Date()) es_mapping.field("forum_pk", Integer()) # not indexed: es_mapping.field("get_absolute_url", Keyword(index=False)) es_mapping.field("forum_title", Text(index=False)) es_mapping.field("forum_get_absolute_url", Keyword(index=False)) return es_mapping
def get_es_mapping(cls): es_mapping = super(Post, cls).get_es_mapping() es_mapping.field('text_html', Text()) es_mapping.field('is_useful', Boolean()) es_mapping.field('is_visible', Boolean()) es_mapping.field('position', Integer()) es_mapping.field('like_dislike_ratio', Float()) es_mapping.field('pubdate', Date()) es_mapping.field('forum_pk', Integer()) es_mapping.field('topic_pk', Integer()) # not indexed: es_mapping.field('get_absolute_url', Keyword(index=False)) es_mapping.field('topic_title', Text(index=False)) es_mapping.field('forum_title', Text(index=False)) es_mapping.field('forum_get_absolute_url', Keyword(index=False)) return es_mapping
def get_es_mapping(cls): es_mapping = super(Post, cls).get_es_mapping() es_mapping.field("text_html", Text()) es_mapping.field("is_useful", Boolean()) es_mapping.field("is_visible", Boolean()) es_mapping.field("position", Integer()) es_mapping.field("like_dislike_ratio", Float()) es_mapping.field("pubdate", Date()) es_mapping.field("forum_pk", Integer()) es_mapping.field("topic_pk", Integer()) # not indexed: es_mapping.field("get_absolute_url", Keyword(index=False)) es_mapping.field("topic_title", Text(index=False)) es_mapping.field("forum_title", Text(index=False)) es_mapping.field("forum_get_absolute_url", Keyword(index=False)) return es_mapping
class ApplicationDocumentType(Document): # purposefully not DED field - this is just for collecting other field values for wilcard search wildcard = Text(analyzer=ngram_analyzer, search_analyzer=whitespace_analyzer, store=True) id = fields.KeywordField() queues = fields.NestedField(doc_class=Queue) name = fields.TextField(copy_to="wildcard", analyzer=descriptive_text_analyzer) reference_code = fields.TextField( copy_to="wildcard", analyzer=reference_code_analyzer, fields={"raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(),}, ) organisation = fields.TextField( copy_to="wildcard", attr="organisation.name", analyzer=descriptive_text_analyzer, fields={"raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(),}, ) status = fields.KeywordField( attr="status.status", fields={"raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(),}, ) submitted_by = fields.ObjectField(doc_class=User) case_officer = fields.NestedField(doc_class=User) goods = fields.NestedField(doc_class=Product) parties = fields.NestedField(doc_class=Party) created = fields.DateField(attr="created_at") updated = fields.DateField(attr="updated_at") case_type = fields.KeywordField( attr="case_type.type", fields={"raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(),}, ) case_subtype = fields.KeywordField( attr="case_type.sub_type", fields={"raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(),}, ) class Index: name = settings.ELASTICSEARCH_APPLICATION_INDEX_ALIAS settings = {"number_of_shards": 1, "number_of_replicas": 0, "max_ngram_diff": 18} class Meta: model = models.BaseApplication class Django: model = models.BaseApplication def get_queryset(self): return super().get_queryset().exclude(status__status="draft") def get_indexing_queryset(self): # hack to make `parties` use the prefetch cache. party manager .all() calls .exclude, which clears cache, # so work around that here: read from the instance's prefetched_parties attr, which was set in prefetch # looks small, but is a huge performance improvement. Helps take db reads down to 7 in total. self._fields["parties"]._path = ["prefetched_parties"] return ( self.get_queryset() .select_related("organisation") .select_related("submitted_by__baseuser_ptr") .select_related("case_officer__baseuser_ptr") .select_related("status") .select_related("case_type") .prefetch_related("queues") .prefetch_related("queues__team") .prefetch_related( Prefetch( "goods", queryset=( models.GoodOnApplication.objects.all() .select_related("good") .select_related("good__organisation") .prefetch_related("good__control_list_entries") .prefetch_related("good__control_list_entries__parent") ), ) ) .prefetch_related( Prefetch( "parties", to_attr="prefetched_parties", queryset=( models.PartyOnApplication.objects.all() .select_related("party") .select_related("party__country") .select_related("party__organisation") ), ) ) )
class ProductDocumentType(Document): # purposefully not DED field - this is just for collecting other field values for wilcard search wildcard = Text( analyzer=ngram_analyzer, search_analyzer=whitespace_analyzer, store=True, ) # purposefully not DED field - this is just for collecting other field values for grouping purposes in ES context = fields.Keyword() # used for grouping canonical_name = fields.KeywordField( attr="good.description") # is overwritten in prepare # base details. iteration 1 id = fields.KeywordField() description = fields.TextField( attr="good.description", copy_to="wildcard", analyzer=descriptive_text_analyzer, ) control_list_entries = fields.NestedField(attr="good.control_list_entries", doc_class=Rating) queues = fields.NestedField(doc_class=Queue, attr="application.queues") organisation = fields.TextField( copy_to="wildcard", attr="good.organisation.name", analyzer=descriptive_text_analyzer, fields={ "raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(), }, ) # does not exist yet). needs to be here for data shape parity with SPIRE name = fields.TextField(attr="good.description") # not mapped yet destination = fields.KeywordField( attr="application.end_user.party.country.name", fields={ "raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(), }, normalizer=lowercase_normalizer, ) end_use = fields.TextField(attr="application.intended_end_use") end_user_type = fields.KeywordField( attr="application.end_user.party.sub_type", normalizer=lowercase_normalizer, ) organisation = fields.TextField( copy_to="wildcard", attr="good.organisation.name", analyzer=descriptive_text_analyzer, fields={ "raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(), }, ) date = fields.DateField(attr="application.submitted_at") application = fields.NestedField(doc_class=ApplicationOnProduct) rating_comment = fields.TextField(attr="good.comment", copy_to="wildcard", analyzer=descriptive_text_analyzer) report_summary = fields.TextField( attr="good.report_summary", fields={ "raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(), }, analyzer=descriptive_text_analyzer, copy_to="wildcard", ) part_number = fields.TextField( attr="good.part_number", fields={ "raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(), }, analyzer=part_number_analyzer, copy_to="wildcard", ) regime = fields.Keyword() class Index: name = settings.ELASTICSEARCH_PRODUCT_INDEX_ALIAS settings = { "number_of_shards": 1, "number_of_replicas": 0, "max_ngram_diff": 18, } class Meta: model = models.GoodOnApplication class Django: model = models.GoodOnApplication def get_queryset(self): return super().get_queryset().exclude( application__status__status="draft") def prepare(self, instance): data = super().prepare(instance) data[ "context"] = f"{data['destination']}🔥{data['end_use']}🔥{data['end_user_type']}" data["canonical_name"] = data["description"] return data def get_indexing_queryset(self): return (self.get_queryset().select_related("good").select_related( "application").select_related("good__organisation"). prefetch_related("application__parties__party__flags"))
class MXXZjacDoc(DocType): caseId = Keyword() ctime = Keyword() timeStamp = Keyword() applicant = Keyword() respondent = Nested(doc_class=InnerObjectWrapper, properties={ "name": Keyword(), "certAddress": Keyword(), "phone": Keyword(), "email": Keyword(), "otherAddress": Keyword(), "idcard": Keyword(), "card_front": Text(), "card_nfront": Text(), }) text = """ 借款年利率:Annual interest rate of borrowing 合同金额:Contract amount 放款金额:Loan amount 合同签订时间: 借款开始时间:Borrowing start time 借款结束时间:End of loan time 借款时常:Borrowing often 借款时长单位:Borrowing time unit 违约时间:Default time 尚欠本金:Still owed principal 尚欠利息:Interest owed 仲裁协议签订时间:Arbitration agreement time 是否分期(分批):Whether to stage (batch) 居间方:Intermediary party 借款用途:Use of the loan 还款方式:Repayment 是否涉外:Whether it is foreign-related """ caseInfo = Object(doc_class=InnerObjectWrapper, properties={ "annualInterestOfBorrowing": Float(), "contractAmount": Float(), "loanAmount": Float(), "contractTime ": Keyword(), "borrowingStartTime": Keyword(), "borrowingEndTime": Keyword(), "borrowingOften": Integer(), "borrowingTimeUnit": Keyword(), "defaultTime": Keyword(), "stillOwedPrincipal": Float(), "interestOwed": Float(), "arbitrationAgreementTime": Keyword(), "whetherStaging": Keyword(), "intermediaryParty": Keyword(), "usageLoan": Keyword(), "repaymentWay": Keyword(), "whetherForeign": Keyword(), }) contentiousAmount = Float() class Meta: index = config.ES_INDEX doc_type = config.ES_DOC_TYPE @classmethod def make_doc(cls, caseId, ctime, timeStamp, applicant, respondent, caseInfo, contentiousAmount): doc_obj = cls() doc_obj.meta.id = caseId doc_obj.caseId = caseId doc_obj.ctime = ctime doc_obj.timeStamp = timeStamp doc_obj.applicant = applicant doc_obj.respondent = respondent doc_obj.respondent = caseInfo doc_obj.respondent = contentiousAmount doc_obj.save() return doc_obj.to_dict(include_meta=False)