Ejemplo n.º 1
0
class Teheran_news(Document):
    topic = Text(analyzer='snowball')

    author = Text(analyzer='snowball')

    title = Text(analyzer='snowball', fields={'raw': Keyword()})

    text = Text(analyzer='snowball')

    related = Keyword()

    tags = Keyword()

    create_at = field.Date()

    upload_at = field.Date()

    url = Keyword()

    class Index:
        name = 'teheran_news'
        settings = {'number_of_shards': 2, 'number_of_replicas': 1}

    def save(self, *args, **kw):
        super().save(*args, **kw)
Ejemplo n.º 2
0
class ForumDocument(SumoDocument):
    """
    ES document for forum posts. Thread information is duplicated across all posts in that thread.
    """

    thread_title = field.Text()
    thread_forum_id = field.Keyword()
    thread_created = field.Date()
    thread_creator_id = field.Keyword()
    thread_is_locked = field.Boolean()
    thread_is_sticky = field.Boolean()

    content = field.Text()
    author_id = field.Keyword()
    created = field.Date()
    updated = field.Date()
    updated_by_id = field.Keyword()

    class Index:
        name = config.FORUM_INDEX_NAME
        using = config.DEFAULT_ES7_CONNECTION

    def get_field_value(self, field, instance, *args):
        if field.startswith("thread_"):
            instance = instance.thread
            field = field[len("thread_"):]
        return super().get_field_value(field, instance, *args)

    @classmethod
    def get_model(cls):
        return Post

    @classmethod
    def get_queryset(cls):
        return Post.objects.select_related("thread")
Ejemplo n.º 3
0
class Manga(Document):
    title = field.Text()
    title = field.Text(analyzer=titles,
                       multi=True,
                       fields={
                           'space': field.Text(analyzer=titles_space,
                                               multi=True),
                           'keyword': field.Keyword(multi=True),
                       })
    tags = field.Object(Tag)
    upload_at = field.Date()
    scan_at = field.Date()

    url = field.Keyword()
    cover_url = field.Keyword()
    images_urls = field.Keyword(multi=True)
    images_len = field.Integer()

    class Index:
        name = 'nhentai__mangas'
        settings = {'number_of_shards': 2, 'number_of_replicas': 1}

    @classmethod
    def url_is_scaned(cls, url):
        logger.info(f"buscando manga {url}")
        if cls.search().filter("term", url=url).count() > 0:
            return True
        return False
 def _schema2doc_map(self):
     _map = {
         'integer':
         dsl_field.Long(),
         'number':
         dsl_field.ScaledFloat(scaling_factor=100),
         'string':
         dsl_field.Text(analyzer=polish_analyzer,
                        fields={
                            'raw': dsl_field.Text(),
                            'keyword': dsl_field.Keyword(),
                        }),
         'any':
         dsl_field.Text(analyzer=polish_analyzer,
                        fields={
                            'raw': dsl_field.Text(),
                            'keyword': dsl_field.Keyword(),
                        }),
         'boolean':
         dsl_field.Boolean(),
         'time':
         dsl_field.Text(
             fields={
                 'text': dsl_field.Text(),
                 'time': dsl_field.Date(
                     format=constance_config.TIME_FORMATS),
             }),
         'duration':
         dsl_field.DateRange(),
         'default':
         dsl_field.Text(),
         'date':
         dsl_field.Text(
             fields={
                 'text': dsl_field.Text(),
                 'date': dsl_field.Date(
                     format=constance_config.DATE_FORMATS),
             }),
         'datetime':
         dsl_field.Text(
             fields={
                 'text':
                 dsl_field.Text(),
                 'datetime':
                 dsl_field.Date(format=constance_config.DATE_FORMATS),
             })
     }
     for key, val in _map.items():
         _map[key] = CustomObject(properties={
             'val': val,
             'repr': dsl_field.Keyword(),
         })
     return _map
Ejemplo n.º 5
0
class CompanyDocument(Document):
    address = field.Nested(
        properties={
            'care_of': field.Keyword(index=False, store=True),
            'po_box': field.Keyword(index=False, store=True),
            'address_line_1': field.Keyword(index=False, store=True),
            'address_line_2': field.Keyword(index=False, store=True),
            'locality': field.Keyword(index=False, store=True),
            'region': field.Keyword(index=False, store=True),
            'country': field.Keyword(index=False, store=True),
            'postal_code': field.Keyword(index=False, store=True)
        })
    country_of_origin = field.Keyword(index=False, store=True)
    address_snippet = field.Keyword(index=False, store=True)
    company_name = field.Text()
    company_number = field.Text()
    company_status = field.Keyword(index=False, store=True)
    type = field.Keyword(index=False, store=True)
    date_of_cessation = field.Date(index=False, format='yyyy-MM-dd')
    date_of_creation = field.Date(index=False, format='yyyy-MM-dd')
    sic_codes = field.Keyword(index=False, store=True)

    class Meta:
        index = settings.ELASTICSEARCH_COMPANY_INDEX_ALIAS

    def to_dict(self, include_meta=False):
        meta = super().to_dict(include_meta)
        if '_source' in meta:
            company = meta['_source']
            company['title'] = company['company_name']
            company['address']['country'] = company['country_of_origin']
            company['company_type'] = company['type']
            meta['_source'] = self.reformat_date(company)
        return meta

    def to_profile_dict(self):
        company = self.to_dict()
        company['registered_office_address'] = company['address']
        return self.reformat_date(company)

    @staticmethod
    def reformat_date(company):
        if 'date_of_creation' in company:
            company['date_of_creation'] = (
                company['date_of_creation'].strftime('%Y-%m-%d'))
        if 'date_of_cessation' in company:
            company['date_of_cessation'] = (
                company['date_of_cessation'].strftime('%Y-%m-%d'))
        return company
    def prepare_doc(self):
        _fields, _map = {}, {}
        for idx, _f in enumerate(self.schema['fields'], 1):
            alias_name = _f['name']
            field_name = 'col{}'.format(idx)
            _field = self._schema2doc_map[_f['type']]
            _map[field_name] = alias_name
            _fields[field_name] = _field

        if self.has_geo_data:
            _fields['shape'] = dsl_field.GeoShape()
            _fields['point'] = dsl_field.GeoPoint()
            _fields['label'] = dsl_field.Text()
            _fields['shape_type'] = dsl_field.Integer()

        _fields['resource'] = dsl_field.Nested(
            properties={
                'id':
                dsl_field.Integer(),
                'title':
                dsl_field.Text(analyzer=polish_analyzer,
                               fields={'raw': dsl_field.Keyword()})
            })

        _fields['updated_at'] = dsl_field.Date()
        _fields['row_no'] = dsl_field.Long()
        _fields['Index'] = type('Index', (type, ), {'name': self.idx_name})

        doc = type(self.idx_name, (Document, ), _fields)
        doc._doc_type.mapping._meta['_meta'] = {'headers': _map}
        return doc
Ejemplo n.º 7
0
class Metadata(InnerDoc):
    language = field.Keyword()
    fuente = field.Keyword()
    frequency = field.Keyword()
    name_publisher = field.Keyword()
    email_publisher = field.Keyword()
    published = field.Date()
Ejemplo n.º 8
0
class ProfileDocument(SumoDocument):
    username = field.Keyword(normalizer="lowercase")
    name = field.Text(fields={"keyword": field.Keyword()})
    email = field.Keyword()
    # store avatar url so we don't need to hit the db when searching users
    # but set enabled=False to ensure ES does no parsing of it
    avatar = field.Object(enabled=False)

    timezone = field.Keyword()
    country = field.Keyword()
    locale = field.Keyword()

    involved_from = field.Date()

    product_ids = field.Keyword(multi=True)
    group_ids = field.Keyword(multi=True)

    class Index:
        name = config.USER_INDEX_NAME
        using = config.DEFAULT_ES7_CONNECTION

    def prepare_username(self, instance):
        return instance.user.username

    def prepare_email(self, instance):
        if instance.public_email:
            return instance.user.email

    def prepare_avatar(self, instance):
        if avatar := instance.fxa_avatar:
            return InnerDoc(url=avatar)
Ejemplo n.º 9
0
    def doc(self):
        if not self._doc_cache:
            _fields, _map = {}, {}
            for idx, _f in enumerate(self.schema['fields']):
                alias_name = _f['name']
                field_name = 'col{}'.format(idx + 1)
                _field = _schema2doc_map[_f['type']]
                _map[field_name] = alias_name
                _fields[field_name] = _field

            _fields['resource'] = dsl_field.Nested(
                properties={
                    'id': dsl_field.Integer(),
                    'title': dsl_field.Text(
                        analyzer=polish_analyzer,
                        fields={'raw': dsl_field.Keyword()})
                }
            )

            _fields['updated_at'] = dsl_field.Date()
            _fields['row_no'] = dsl_field.Long()

            doc = type(self.idx_name, (DocType,), _fields)
            doc._doc_type.index = self.idx_name
            doc._doc_type.mapping._meta['_meta'] = {'headers': _map}
            doc._doc_type.mapping._meta['_meta']
            self._doc_cache = doc
        return self._doc_cache
Ejemplo n.º 10
0
class Document(BaseDocument):
    url = field.Keyword()
    url_text = field.Text()
    referer = field.Keyword()
    title = field.Text()
    html = field.Text()
    text = field.Text()
    timestamp = field.Date(default_timezone=settings.TIME_ZONE)
Ejemplo n.º 11
0
class Activity(InnerDoc):
    action = field.Text(analyzer=titles,
                        fields={
                            'space': field.Text(analyzer=titles_space),
                            'keyword': field.Keyword(),
                        })
    date = field.Date()
    user = field.Object(User)
Ejemplo n.º 12
0
class Entry(document.Document):
    forms = field.Nested(Form)
    created = field.Date()
    superentry = field.Text()

    def save(self, **kwargs):
        return super(Entry, self).save(**kwargs)

    def is_published(self):
        return datetime.now() > self.created
Ejemplo n.º 13
0
class MessageIndex(DocType):
    room = field.Keyword()
    user = field.Text()
    created = field.Date()
    message = field.Text()
    status = field.Text()
    tags = Nested(properties={'tags': field.Text()})

    class Meta:
        index = 'Message'
Ejemplo n.º 14
0
class Somos_kudasai(Document):
    author = Text(analyzer='snowball')

    title = Text(analyzer='snowball', fields={'raw': Keyword()})

    text = Text(analyzer='snowball')

    create_at = field.Date()

    upload_at = field.Date()

    url = Keyword()

    class Index:
        name = 'somos_kudasai'
        settings = {'number_of_shards': 2, 'number_of_replicas': 1}

    def save(self, *args, **kw):
        super().save(*args, **kw)
Ejemplo n.º 15
0
class Document(DocType):
    id = field.Integer()
    title = field.String(analyzer='snowball'),
    author = field.String(analyzer='snowball'),
    creation_date = field.Date(),
    pages = field.Integer(),
    content = field.String(analyzer='snowball'),
    lang = field.String(),
    size = field.Integer(),
    tags = field.String(index='not_analyzed')
    autocomplete = field.Text(analyzer = ngram_analyzer)
Ejemplo n.º 16
0
class ForumDocument(SumoDocument):
    """
    ES document for forum posts. Thread information is duplicated across all posts in that thread.
    """

    thread_title = field.Text()
    thread_forum_id = field.Keyword()
    forum_slug = field.Keyword()
    thread_id = field.Keyword()
    thread_created = field.Date()
    thread_creator_id = field.Keyword()
    thread_is_locked = field.Boolean()
    thread_is_sticky = field.Boolean()

    content = field.Text()
    author_id = field.Keyword()
    created = field.Date()
    updated = field.Date()
    updated_by_id = field.Keyword()

    class Index:
        pass

    def prepare_forum_slug(self, instance):
        return instance.thread.forum.slug

    def get_field_value(self, field, instance, *args):
        if field.startswith("thread_"):
            instance = instance.thread
            field = field[len("thread_"):]
        return super().get_field_value(field, instance, *args)

    @classmethod
    def get_model(cls):
        return Post

    @classmethod
    def get_queryset(cls):
        return Post.objects.prefetch_related("thread", "thread__forum")
Ejemplo n.º 17
0
def test_date_field_can_have_default_tz():
    f = field.Date(default_timezone='UTC')
    now = datetime.now()

    now_with_tz = f._deserialize(now)

    assert now_with_tz.tzinfo == tz.gettz('UTC')
    assert now.isoformat() + '+00:00' == now_with_tz.isoformat()

    now_with_tz = f._deserialize(now.isoformat())

    assert now_with_tz.tzinfo == tz.gettz('UTC')
    assert now.isoformat() + '+00:00' == now_with_tz.isoformat()
class Article( Document ):
    title = field.Text(
        analyzer=titles, multi=True,
        fields={
            'space': field.Text( analyzer=titles_space, multi=True ),
            'keyword': field.Keyword( multi=True ),
        } )
    text = field.Text(
        analyzer=titles, multi=True,
        fields={
            'space': field.Text( analyzer=titles_space, multi=True ),
            'keyword': field.Keyword( multi=True ),
        } )
    category = field.Text(
        analyzer=category, multi=True,
        fields={
            'keyword': field.Keyword( multi=True ),
        } )
    create_at = field.Date()
    upload_at = field.Date()
    scan_at = field.Date()
    url = field.Keyword()

    class Index:
        name = 'somos_kudasai__articles'
        settings = { 'number_of_shards': 2, 'number_of_replicas': 1 }

    @classmethod
    def url_is_scaned( cls, url ):
        logger.info( f"buscando articulo {url}" )
        if cls.search().filter( "term", url=url ).count() > 0:
            return True
        return False

    def save( self, *args, **kw ):
        super().save( *args, **kw )
Ejemplo n.º 19
0
class EntryDoc(SerializedDoc):
    title = field.String()
    author = field.String()
    content = field.String(
        analyzer=html_strip,
        fields={'raw': field.String(index='not_analyzed')}
    )
    description = field.String(
        analyzer=html_strip,
        fields={'raw': field.String(index='not_analyzed')}
    )
    publication_date = field.Date()
    categories = field.Nested(
        doc_class=CategoryDoc,
        properties={'name': field.String(fields={'raw': field.String(index='not_analyzed')})}
    )

    def get_display_name(self):
        return self.title
Ejemplo n.º 20
0
class ProfileDocument(SumoDocument):
    username = field.Keyword(normalizer="lowercase")
    name = field.Text(fields={"keyword": field.Keyword()})
    email = field.Keyword()
    # store avatar url so we don't need to hit the db when searching users
    # but set enabled=False to ensure ES does no parsing of it
    avatar = field.Object(enabled=False)

    timezone = field.Keyword()
    country = field.Keyword()
    locale = field.Keyword()

    involved_from = field.Date()

    product_ids = field.Keyword(multi=True)
    group_ids = field.Keyword(multi=True)

    class Index:
        name = config.USER_INDEX_NAME
        using = config.DEFAULT_ES7_CONNECTION

    @classmethod
    def prepare(cls, instance):
        """Override super method to exclude docs from indexing."""
        # Add a discard field in the document if the following conditions are met
        # User is not active
        if not instance.user.is_active:
            instance.es_discard_doc = "unindex_me"

        return super(ProfileDocument, cls).prepare(instance)

    def prepare_username(self, instance):
        return instance.user.username

    def prepare_email(self, instance):
        if instance.public_email:
            return instance.user.email

    def prepare_avatar(self, instance):
        if avatar := instance.fxa_avatar:
            return InnerDoc(url=avatar)
    def prepare_doc(self):
        _fields = {
            'shape':
            dsl_field.GeoShape(),
            'point':
            dsl_field.GeoPoint(),
            'shape_type':
            dsl_field.Integer(),
            'label':
            dsl_field.Text(),
            'resource':
            dsl_field.Nested(
                properties={
                    'id':
                    dsl_field.Integer(),
                    'title':
                    dsl_field.Text(analyzer=polish_analyzer,
                                   fields={'raw': dsl_field.Keyword()})
                }),
            'updated_at':
            dsl_field.Date(),
            'row_no':
            dsl_field.Long()
        }
        _map = {}

        for idx, _f in enumerate(self.schema, 1):
            if _f.type not in self._schema2doc_map:
                continue
            alias_name = _f.name
            field_name = f'col{idx}'
            _field = self._schema2doc_map[_f.type]
            _map[field_name] = alias_name
            _fields[field_name] = _field
            _fields['Index'] = type('Index', (type, ), {'name': self.idx_name})

        doc = type(self.idx_name, (Document, ), _fields)
        doc._doc_type.mapping._meta['_meta'] = {'headers': _map}
        return doc
Ejemplo n.º 22
0
class Resource(Document):
    title = field.Text(analyzer=titles,
                       fields={
                           'space': field.Text(analyzer=titles_space),
                           'keyword': field.Keyword(),
                       })
    description = field.Text(analyzer=titles,
                             fields={
                                 'space': field.Text(analyzer=titles_space),
                                 'keyword': field.Keyword(),
                             })
    kind = field.Keyword()
    url = field.Keyword()
    created_at = field.Date()

    tags = field.Text(analyzer=titles,
                      multi=True,
                      fields={
                          'space': field.Text(analyzer=titles_space,
                                              multi=True),
                          'keyword': field.Keyword(multi=True),
                      })

    metadata = field.Object(Metadata)

    class Index:
        name = 'chibi_gob__open_data__dataset__resource'
        settings = {'number_of_shards': 2, 'number_of_replicas': 1}

    @classmethod
    def url_is_scaned(cls, url):
        logger.info(f"buscando dataset {url}")
        if cls.search().filter("term", url=url).count() > 0:
            return True
        return False

    def save(self, *args, **kw):
        super().save(*args, **kw)
Ejemplo n.º 23
0
class Dataset(Document):
    resources = field.Object(Data_set_resource, multi=True)
    tags = field.Text(analyzer=titles,
                      multi=True,
                      fields={
                          'space': field.Text(analyzer=titles_space,
                                              multi=True),
                          'keyword': field.Keyword(multi=True),
                      })

    metadata = field.Object(Metadata)
    activity = field.Object(Activity, multi=True)
    url = field.Keyword()
    status = field.Keyword()
    created_at = field.Date()

    class Index:
        name = 'chibi_gob__open_data__dataset'
        settings = {'number_of_shards': 2, 'number_of_replicas': 1}

    @classmethod
    def url_is_scaned(cls, url):
        logger.info(f"buscando dataset {url}")
        if cls.search().filter("term", url=url).count() > 0:
            return True
        return False

    @classmethod
    def get_by_url(cls, url):
        logger.info(f"get dataset {url}")
        result = cls.search().filter("term", url=url)[:1].execute()
        if result:
            return result[0]
        return None

    def save(self, *args, **kw):
        super().save(*args, **kw)
Ejemplo n.º 24
0
class Profile(Document):
    created_at = field.Date()
    ssn_trace = field.Nested(Ssn_trace)

    meta_sub_profiles = field.Nested(Inner_sub_profile)

    class Index:
        name = "profile"

    @property
    def pk(self):
        return self.meta.id

    @property
    def sub_profiles(self):
        return [p.sub_profile for p in self.meta_sub_profiles]

    def validate_sub_profiles(self, *sub_profiles_ids):
        valid_sub_profiles_ids = {
            s.sub_profile_id
            for s in self.meta_sub_profiles
        }
        for sub_profile_id in sub_profiles_ids:
            if sub_profile_id not in valid_sub_profiles_ids:
                raise ValueError(
                    "the profile: '{}' dont have the sub profile: '{}'".format(
                        self.meta.id, sub_profile_id))

    def attach_sub_profiles(self, sub_profiles):
        if not isinstance(self.meta_sub_profiles, list):
            self.meta_sub_profiles = []
        for sub_profile in sub_profiles:
            self.meta_sub_profiles.append({
                'sub_profile_id': sub_profile.meta.id,
                'status': 'unknown'
            })
Ejemplo n.º 25
0
class MyDoc(document.DocType):
    title = field.String(index='not_analyzed')
    name = field.String()
    created_at = field.Date()
    inner = field.Object(properties={'old_field': field.String()},
                         doc_class=MyInner)
Ejemplo n.º 26
0
}, {
    POS: 'NOUN',
    'OP': "+"
}]), ('pnp', [{
    POS: "ADJ",
    "OP": "*"
}, {
    POS: "PROPN",
    "OP": "+"
}])]

PD2ES_TYPES = {
    np.dtype('O'): field.Text(),
    np.dtype('int64'): field.Integer(),
    np.dtype('float64'): field.Double(),
    np.dtype('<M8[ns]'): field.Date(),
    np.dtype('bool'): field.Boolean()
}


def load_spacy_model():
    nlp = spacy.load('en_core_web_lg')
    nlp.tokenizer.infix_finditer = re.compile(r'[~\-_]').finditer
    return nlp


def make_matcher(nlp, patterns=PATTERNS):
    matcher = spacy.matcher.Matcher(nlp.vocab)
    for _id, pattern in PATTERNS:
        matcher.add(_id, None, pattern)
    return matcher
class MyDoc(document.Document):
    title = field.Keyword()
    name = field.Text()
    created_at = field.Date()
    inner = field.Object(MyInner)
Ejemplo n.º 28
0
class QuestionDocument(SumoDocument):
    """
    ES document for Questions. Every Question in DB gets a QuestionDocument in ES.

    Parent class to AnswerDocument, with most fields here prefixed with "question_".

    This document defines the question-specific fields (most of) which are de-normalized
    in the AnswerDocument. Since QuestionDocument and AnswerDocument are stored in the
    same index, ES sees QuestionDocuments and AnswerDocuments the same, just with some
    documents missing certain fields.

    Enables searching for AAQ threads as a unit.
    """

    question_id = field.Keyword()

    question_title = SumoLocaleAwareTextField()
    question_creator_id = field.Keyword()
    question_content = SumoLocaleAwareTextField(
        term_vector="with_positions_offsets")

    question_created = field.Date()
    question_updated = field.Date()
    question_updated_by_id = field.Keyword()
    question_has_solution = field.Boolean()
    question_is_locked = field.Boolean()
    question_is_archived = field.Boolean()

    question_product_id = field.Keyword()
    question_topic_id = field.Keyword()

    question_taken_by_id = field.Keyword()
    question_taken_until = field.Date()

    question_tag_ids = field.Keyword(multi=True)
    question_num_votes = field.Integer()

    # store answer content to optimise searching for AAQ threads as a unit
    answer_content = SumoLocaleAwareTextField(
        multi=True, term_vector="with_positions_offsets")

    locale = field.Keyword()

    class Index:
        name = config.QUESTION_INDEX_NAME
        using = config.DEFAULT_ES7_CONNECTION

    @classmethod
    def prepare(cls, instance):
        """Override super method to exclude certain docs."""
        # Add a discard field in the document if the following conditions are met
        # Question document is spam
        if instance.is_spam:
            instance.es_discard_doc = "unindex_me"

        return super(QuestionDocument, cls).prepare(instance)

    def prepare_question_tag_ids(self, instance):
        return [tag.id for tag in instance.tags.all()]

    def prepare_question_has_solution(self, instance):
        return instance.solution_id is not None

    def prepare_question_num_votes(self, instance):
        if hasattr(instance, "es_question_num_votes"):
            return instance.es_question_num_votes
        return instance.num_votes

    def prepare_answer_content(self, instance):
        return [
            answer.content for answer in (
                # when bulk indexing use answer queryset prefetched in `get_queryset` method
                # this is to avoid running an extra query for each question in the chunk
                instance.es_question_answers_not_spam if hasattr(
                    instance, "es_question_answers_not_spam")
                # fallback if non-spam answers haven't been prefetched
                else instance.answers.filter(is_spam=False))
        ]

    def get_field_value(self, field, *args):
        if field.startswith("question_"):
            field = field[len("question_"):]
        return super().get_field_value(field, *args)

    @classmethod
    def get_model(cls):
        return Question

    @classmethod
    def get_queryset(cls):
        return (
            Question.objects
            # prefetch answers which aren't spam to avoid extra queries when iterating over them
            .prefetch_related(
                Prefetch(
                    "answers",
                    queryset=Answer.objects.filter(is_spam=False),
                    to_attr="es_question_answers_not_spam",
                ))
            # prefetch tags to avoid extra queries when iterating over them
            .prefetch_related("tags")
            # count votes in db to improve performance
            .annotate(es_question_num_votes=Count("votes")))
Ejemplo n.º 29
0
class AnswerDocument(QuestionDocument):
    """
    ES document for Answers. Every Answer in DB gets an AnswerDocument in ES.

    Child class to QuestionDocument, with fields here un-prefixed.

    This document defines the answer-specific fields which are included in an AnswerDocument
    in addition to the de-normalized fields of an Answer's Question which are defined in
    QuestionDocument. Since QuestionDocument and AnswerDocument are stored in the same index,
    ES sees QuestionDocuments and AnswerDocuments the same, just with some documents missing
    certain fields.

    Enables aggregations on answers, such as when creating contribution metrics, and enables
    searching within an AAQ thread, or on Answer-specific properties like being a solution.
    """

    creator_id = field.Keyword()
    created = field.Date()
    content = SumoLocaleAwareTextField(term_vector="with_positions_offsets")
    updated = field.Date()
    updated_by_id = field.Keyword()

    num_helpful_votes = field.Integer()
    num_unhelpful_votes = field.Integer()

    is_solution = field.Boolean()

    @classmethod
    def prepare(cls, instance, **kwargs):
        """Override super method to exclude certain docs."""
        # Add a discard field in the document if the following conditions are met
        # Answer document is spam
        if instance.is_spam or instance.question.is_spam:
            instance.es_discard_doc = "unindex_me"

        obj = super().prepare(instance, **kwargs)
        # add a prefix to the id so we don't clash with QuestionDocuments
        obj.meta.id = "a_{}".format(obj.meta.id)
        return obj

    def prepare_is_solution(self, instance):
        solution_id = instance.question.solution_id
        return solution_id is not None and solution_id == instance.id

    def prepare_locale(self, instance):
        return instance.question.locale

    def prepare_num_helpful_votes(self, instance):
        if hasattr(instance, "es_num_helpful_votes"):
            return instance.es_num_helpful_votes
        return instance.num_helpful_votes

    def prepare_num_unhelpful_votes(self, instance):
        if hasattr(instance, "es_num_unhelpful_votes"):
            return instance.es_num_unhelpful_votes
        return instance.num_unhelpful_votes

    def prepare_answer_content(self, instance):
        # clear answer_content field from QuestionDocument,
        # as we don't need the content of sibling answers in an AnswerDocument
        return None

    def get_field_value(self, field, instance, *args):
        if field.startswith("question_"):
            instance = instance.question
        return super().get_field_value(field, instance, *args)

    def to_action(self, *args, **kwargs):
        # if the id is un-prefixed, add it
        if not str(self.meta.id).startswith("a_"):
            self.meta.id = f"a_{self.meta.id}"
        return super().to_action(*args, **kwargs)

    @classmethod
    def get(cls, id, **kwargs):
        # if the id is un-prefixed, add it
        if not str(id).startswith("a_"):
            id = f"a_{id}"
        return super().get(id, **kwargs)

    @classmethod
    def get_model(cls):
        return Answer

    @classmethod
    def get_queryset(cls):
        return (
            Answer.objects
            # prefetch each answer's question,
            # applying the same optimizations as in the QuestionDocument
            .prefetch_related(
                Prefetch("question", queryset=QuestionDocument.get_queryset()))
            # count votes in db to improve performance
            .annotate(
                es_num_helpful_votes=Count("votes",
                                           filter=Q(votes__helpful=True)),
                es_num_unhelpful_votes=Count("votes",
                                             filter=Q(votes__helpful=False)),
            ))
Ejemplo n.º 30
0
class WikiDocument(SumoDocument):
    updated = field.Date()

    product_ids = field.Keyword(multi=True)
    topic_ids = field.Keyword(multi=True)
    category = field.Keyword()

    # Document specific fields (locale aware)
    title = SumoLocaleAwareTextField()
    content = SumoLocaleAwareTextField(store=True,
                                       term_vector="with_positions_offsets")
    summary = SumoLocaleAwareTextField(store=True,
                                       term_vector="with_positions_offsets")
    # store keywords in a text field so they're stemmed:
    keywords = SumoLocaleAwareTextField()
    slug = SumoLocaleAwareKeywordField(store=True)
    doc_id = SumoLocaleAwareKeywordField(store=True)

    class Index:
        name = config.WIKI_DOCUMENT_INDEX_NAME
        using = config.DEFAULT_ES7_CONNECTION

    @classmethod
    @property
    def update_document(cls):
        """Wiki Documents should be merged/updated."""
        return True

    @classmethod
    def prepare(cls, instance):
        """Override super method to merge docs for KB."""
        # Add a discard field in the document if the following conditions are met
        # Wiki document is a redirect
        # Wiki document is archived
        # Wiki document is a template
        if any([
                instance.html.startswith(REDIRECT_HTML),
                instance.is_archived,
                instance.category
                in [TEMPLATES_CATEGORY, CANNED_RESPONSES_CATEGORY],
        ]):
            instance.es_discard_doc = "unindex_me"

        return super(WikiDocument, cls).prepare(instance,
                                                parent_id=instance.parent_id)

    def prepare_updated(self, instance):
        return getattr(instance.current_revision, "created", None)

    def prepare_keywords(self, instance):
        """Return the current revision's keywords as a string."""
        return getattr(instance.current_revision, "keywords", "")

    def prepare_content(self, instance):
        return instance.html

    def prepare_summary(self, instance):
        if instance.current_revision:
            return instance.summary
        return ""

    def prepare_doc_id(self, instance):
        return instance.pk

    def prepare_topic_ids(self, instance):
        return [topic.id for topic in instance.topics.all()]

    def prepare_product_ids(self, instance):
        return [product.id for product in instance.products.all()]

    def prepare_display_order(self, instance):
        return instance.original.display_order

    @classmethod
    def get_model(cls):
        return wiki_models.Document

    @classmethod
    def get_queryset(cls):
        return (
            # do not include any documents without an approved revision
            wiki_models.Document.objects.exclude(current_revision__isnull=True)
            # all documents will need their current revision:
            .select_related("current_revision")
            # parent documents will need their topics and products:
            .prefetch_related("topics", "products"))