예제 #1
0
class PageHit(DocType):
    doc_id = Long()
    fc_user_id = Long()
    product_id = Long()

    permissions = Long(multi=True)
    access_time = Date()
    origin_ip = Ip()
    request_type = Text()
    uri = Text()
    page_number = Long()
    is_full = Boolean()
    is_known_article = Boolean()
    is_known_product = Boolean()

    class Meta:
        index = 'hits'

    def save(self, **kwargs):
        return super(PageHit, self).save(**kwargs)

    @classmethod
    def properties(cls):
        props = PageHit._doc_type.mapping.properties.to_dict(
        )['doc']['properties'].keys()
        return [prop for prop in props]
예제 #2
0
class Facebook(DataHead):

    group_detail_list = ['id', 'cover', 'description', 'name']
    post_extra_data = ['id', 'message', 'updated_time', 'permalink_url']

    keywords = Keyword(multi=True)
    group_name = Text()
    post_date = Date()
    extra_data = Nested()
    group_extra_data = Nested(
        properties=dict(map(lambda x: (x, Text()), group_detail_list)))
예제 #3
0
class Pipeline_Document(Base_Document):
    """Represents the stored pipelines."""

    # id already defined by Elasticsearch
    # updated_at = Date()  #FIXME long format not compatible with date
    # created_at = Date(required=True)  #FIXME long format not compatible with date
    name = Text()
    status = Text(required=True)
    user = Text()

    class Index:
        """Elasticsearch configuration."""

        name = 'pipeline'
예제 #4
0
class Movie(Document):
    title = Text(fields={'raw': {'type': 'keyword'}})
    film_rating = Text()
    duration = Text()
    genre = Keyword(multi=True)
    release_date = Text()
    release_date_unix_time = Float()
    imdb_ratingValue = Float()
    imdb_bestRating = Float()
    imdb_ratingCount = Float()
    description = Text()
    storyline = Text()
    poster = Text()
    trailer_img = Text()
    director = Keyword(multi=True)
    creator = Keyword(multi=True)
    writer = Keyword(multi=True)
    stars = Keyword(multi=True)
    taglines = Keyword(multi=True)
    url = Keyword()
    req_headers = Object(enabled=False)
    res_headers = Object(enabled=False)

    suggest = Completion(analyzer=ngram_analyzer,
                         search_analyzer=analyzer('standard'))

    class Index:
        name = 'imdb'
예제 #5
0
class Movie(DocType):
    title = Text(fields={'raw': {'type': 'keyword'}})
    summary = Text()
    datePublished = Date()
    creators = Keyword(multi=True)
    genres = Keyword(multi=True)
    casts = Keyword(multi=True)
    time = Integer()
    countries = Keyword(multi=True)
    plot_keywords = Keyword(multi=True)
    languages = Keyword(multi=True)
    rating = Float()
    poster = Keyword()
    suggest = Completion(analyzer=ngram_analyzer,
                         search_analyzer=analyzer('standard'))

    class Meta:
        index = 'imdb'
예제 #6
0
    def get_es_mapping(cls):
        es_mapping = super(Topic, cls).get_es_mapping()

        es_mapping.field('title', Text(boost=1.5))
        es_mapping.field('tags', Text(boost=2.0))
        es_mapping.field('subtitle', Text())
        es_mapping.field('is_solved', Boolean())
        es_mapping.field('is_locked', Boolean())
        es_mapping.field('is_sticky', Boolean())
        es_mapping.field('pubdate', Date())
        es_mapping.field('forum_pk', Integer())

        # not indexed:
        es_mapping.field('get_absolute_url', Keyword(index=False))
        es_mapping.field('forum_title', Text(index=False))
        es_mapping.field('forum_get_absolute_url', Keyword(index=False))

        return es_mapping
예제 #7
0
    def get_es_mapping(cls):
        es_mapping = super(Topic, cls).get_es_mapping()

        es_mapping.field("title", Text(boost=1.5))
        es_mapping.field("tags", Text(boost=2.0))
        es_mapping.field("subtitle", Text())
        es_mapping.field("is_solved", Boolean())
        es_mapping.field("is_locked", Boolean())
        es_mapping.field("is_sticky", Boolean())
        es_mapping.field("pubdate", Date())
        es_mapping.field("forum_pk", Integer())

        # not indexed:
        es_mapping.field("get_absolute_url", Keyword(index=False))
        es_mapping.field("forum_title", Text(index=False))
        es_mapping.field("forum_get_absolute_url", Keyword(index=False))

        return es_mapping
예제 #8
0
    def get_es_mapping(cls):
        es_mapping = super(Post, cls).get_es_mapping()

        es_mapping.field('text_html', Text())
        es_mapping.field('is_useful', Boolean())
        es_mapping.field('is_visible', Boolean())
        es_mapping.field('position', Integer())
        es_mapping.field('like_dislike_ratio', Float())
        es_mapping.field('pubdate', Date())
        es_mapping.field('forum_pk', Integer())
        es_mapping.field('topic_pk', Integer())

        # not indexed:
        es_mapping.field('get_absolute_url', Keyword(index=False))
        es_mapping.field('topic_title', Text(index=False))
        es_mapping.field('forum_title', Text(index=False))
        es_mapping.field('forum_get_absolute_url', Keyword(index=False))

        return es_mapping
예제 #9
0
    def get_es_mapping(cls):
        es_mapping = super(Post, cls).get_es_mapping()

        es_mapping.field("text_html", Text())
        es_mapping.field("is_useful", Boolean())
        es_mapping.field("is_visible", Boolean())
        es_mapping.field("position", Integer())
        es_mapping.field("like_dislike_ratio", Float())
        es_mapping.field("pubdate", Date())
        es_mapping.field("forum_pk", Integer())
        es_mapping.field("topic_pk", Integer())

        # not indexed:
        es_mapping.field("get_absolute_url", Keyword(index=False))
        es_mapping.field("topic_title", Text(index=False))
        es_mapping.field("forum_title", Text(index=False))
        es_mapping.field("forum_get_absolute_url", Keyword(index=False))

        return es_mapping
예제 #10
0
class ApplicationDocumentType(Document):
    # purposefully not DED field - this is just for collecting other field values for wilcard search
    wildcard = Text(analyzer=ngram_analyzer, search_analyzer=whitespace_analyzer, store=True)

    id = fields.KeywordField()
    queues = fields.NestedField(doc_class=Queue)
    name = fields.TextField(copy_to="wildcard", analyzer=descriptive_text_analyzer)
    reference_code = fields.TextField(
        copy_to="wildcard",
        analyzer=reference_code_analyzer,
        fields={"raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(),},
    )
    organisation = fields.TextField(
        copy_to="wildcard",
        attr="organisation.name",
        analyzer=descriptive_text_analyzer,
        fields={"raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(),},
    )
    status = fields.KeywordField(
        attr="status.status",
        fields={"raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(),},
    )
    submitted_by = fields.ObjectField(doc_class=User)
    case_officer = fields.NestedField(doc_class=User)
    goods = fields.NestedField(doc_class=Product)
    parties = fields.NestedField(doc_class=Party)

    created = fields.DateField(attr="created_at")
    updated = fields.DateField(attr="updated_at")
    case_type = fields.KeywordField(
        attr="case_type.type",
        fields={"raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(),},
    )
    case_subtype = fields.KeywordField(
        attr="case_type.sub_type",
        fields={"raw": fields.KeywordField(normalizer=lowercase_normalizer), "suggest": fields.CompletionField(),},
    )

    class Index:
        name = settings.ELASTICSEARCH_APPLICATION_INDEX_ALIAS
        settings = {"number_of_shards": 1, "number_of_replicas": 0, "max_ngram_diff": 18}

    class Meta:
        model = models.BaseApplication

    class Django:
        model = models.BaseApplication

    def get_queryset(self):
        return super().get_queryset().exclude(status__status="draft")

    def get_indexing_queryset(self):
        # hack to make `parties` use the prefetch cache. party manager .all() calls .exclude, which clears cache,
        # so work around that here: read from the instance's prefetched_parties attr, which was set in prefetch
        # looks small, but is a huge performance improvement. Helps take db reads down to 7 in total.
        self._fields["parties"]._path = ["prefetched_parties"]

        return (
            self.get_queryset()
            .select_related("organisation")
            .select_related("submitted_by__baseuser_ptr")
            .select_related("case_officer__baseuser_ptr")
            .select_related("status")
            .select_related("case_type")
            .prefetch_related("queues")
            .prefetch_related("queues__team")
            .prefetch_related(
                Prefetch(
                    "goods",
                    queryset=(
                        models.GoodOnApplication.objects.all()
                        .select_related("good")
                        .select_related("good__organisation")
                        .prefetch_related("good__control_list_entries")
                        .prefetch_related("good__control_list_entries__parent")
                    ),
                )
            )
            .prefetch_related(
                Prefetch(
                    "parties",
                    to_attr="prefetched_parties",
                    queryset=(
                        models.PartyOnApplication.objects.all()
                        .select_related("party")
                        .select_related("party__country")
                        .select_related("party__organisation")
                    ),
                )
            )
        )
예제 #11
0
class ProductDocumentType(Document):
    # purposefully not DED field - this is just for collecting other field values for wilcard search
    wildcard = Text(
        analyzer=ngram_analyzer,
        search_analyzer=whitespace_analyzer,
        store=True,
    )
    # purposefully not DED field - this is just for collecting other field values for grouping purposes in ES
    context = fields.Keyword()

    # used for grouping
    canonical_name = fields.KeywordField(
        attr="good.description")  # is overwritten in prepare

    # base details. iteration 1
    id = fields.KeywordField()
    description = fields.TextField(
        attr="good.description",
        copy_to="wildcard",
        analyzer=descriptive_text_analyzer,
    )
    control_list_entries = fields.NestedField(attr="good.control_list_entries",
                                              doc_class=Rating)
    queues = fields.NestedField(doc_class=Queue, attr="application.queues")

    organisation = fields.TextField(
        copy_to="wildcard",
        attr="good.organisation.name",
        analyzer=descriptive_text_analyzer,
        fields={
            "raw": fields.KeywordField(normalizer=lowercase_normalizer),
            "suggest": fields.CompletionField(),
        },
    )

    # does not exist yet). needs to be here for data shape parity with SPIRE
    name = fields.TextField(attr="good.description")
    # not mapped yet
    destination = fields.KeywordField(
        attr="application.end_user.party.country.name",
        fields={
            "raw": fields.KeywordField(normalizer=lowercase_normalizer),
            "suggest": fields.CompletionField(),
        },
        normalizer=lowercase_normalizer,
    )
    end_use = fields.TextField(attr="application.intended_end_use")
    end_user_type = fields.KeywordField(
        attr="application.end_user.party.sub_type",
        normalizer=lowercase_normalizer,
    )

    organisation = fields.TextField(
        copy_to="wildcard",
        attr="good.organisation.name",
        analyzer=descriptive_text_analyzer,
        fields={
            "raw": fields.KeywordField(normalizer=lowercase_normalizer),
            "suggest": fields.CompletionField(),
        },
    )
    date = fields.DateField(attr="application.submitted_at")

    application = fields.NestedField(doc_class=ApplicationOnProduct)

    rating_comment = fields.TextField(attr="good.comment",
                                      copy_to="wildcard",
                                      analyzer=descriptive_text_analyzer)

    report_summary = fields.TextField(
        attr="good.report_summary",
        fields={
            "raw": fields.KeywordField(normalizer=lowercase_normalizer),
            "suggest": fields.CompletionField(),
        },
        analyzer=descriptive_text_analyzer,
        copy_to="wildcard",
    )

    part_number = fields.TextField(
        attr="good.part_number",
        fields={
            "raw": fields.KeywordField(normalizer=lowercase_normalizer),
            "suggest": fields.CompletionField(),
        },
        analyzer=part_number_analyzer,
        copy_to="wildcard",
    )

    regime = fields.Keyword()

    class Index:
        name = settings.ELASTICSEARCH_PRODUCT_INDEX_ALIAS
        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 0,
            "max_ngram_diff": 18,
        }

    class Meta:
        model = models.GoodOnApplication

    class Django:
        model = models.GoodOnApplication

    def get_queryset(self):
        return super().get_queryset().exclude(
            application__status__status="draft")

    def prepare(self, instance):
        data = super().prepare(instance)
        data[
            "context"] = f"{data['destination']}🔥{data['end_use']}🔥{data['end_user_type']}"
        data["canonical_name"] = data["description"]
        return data

    def get_indexing_queryset(self):
        return (self.get_queryset().select_related("good").select_related(
            "application").select_related("good__organisation").
                prefetch_related("application__parties__party__flags"))
예제 #12
0
class MXXZjacDoc(DocType):
    caseId = Keyword()
    ctime = Keyword()
    timeStamp = Keyword()
    applicant = Keyword()
    respondent = Nested(doc_class=InnerObjectWrapper,
                        properties={
                            "name": Keyword(),
                            "certAddress": Keyword(),
                            "phone": Keyword(),
                            "email": Keyword(),
                            "otherAddress": Keyword(),
                            "idcard": Keyword(),
                            "card_front": Text(),
                            "card_nfront": Text(),
                        })

    text = """
    借款年利率:Annual interest rate of borrowing
    合同金额:Contract amount
    放款金额:Loan amount
    合同签订时间:  
    借款开始时间:Borrowing start time
    借款结束时间:End of loan time
    借款时常:Borrowing often
    借款时长单位:Borrowing time unit
    违约时间:Default time
    尚欠本金:Still owed principal
    尚欠利息:Interest owed
    仲裁协议签订时间:Arbitration agreement time
    是否分期(分批):Whether to stage (batch)
    居间方:Intermediary party
    借款用途:Use of the loan
    还款方式:Repayment
    是否涉外:Whether it is foreign-related
    """
    caseInfo = Object(doc_class=InnerObjectWrapper,
                      properties={
                          "annualInterestOfBorrowing": Float(),
                          "contractAmount": Float(),
                          "loanAmount": Float(),
                          "contractTime ": Keyword(),
                          "borrowingStartTime": Keyword(),
                          "borrowingEndTime": Keyword(),
                          "borrowingOften": Integer(),
                          "borrowingTimeUnit": Keyword(),
                          "defaultTime": Keyword(),
                          "stillOwedPrincipal": Float(),
                          "interestOwed": Float(),
                          "arbitrationAgreementTime": Keyword(),
                          "whetherStaging": Keyword(),
                          "intermediaryParty": Keyword(),
                          "usageLoan": Keyword(),
                          "repaymentWay": Keyword(),
                          "whetherForeign": Keyword(),
                      })

    contentiousAmount = Float()

    class Meta:
        index = config.ES_INDEX
        doc_type = config.ES_DOC_TYPE

    @classmethod
    def make_doc(cls, caseId, ctime, timeStamp, applicant, respondent,
                 caseInfo, contentiousAmount):
        doc_obj = cls()
        doc_obj.meta.id = caseId
        doc_obj.caseId = caseId
        doc_obj.ctime = ctime
        doc_obj.timeStamp = timeStamp
        doc_obj.applicant = applicant
        doc_obj.respondent = respondent
        doc_obj.respondent = caseInfo
        doc_obj.respondent = contentiousAmount
        doc_obj.save()
        return doc_obj.to_dict(include_meta=False)