class LinkDocument(Document):
    subjects = fields.TextField(attr="subjects_to_string")
    user = fields.ObjectField(properties={'username': fields.TextField()})
    title_suggestions = fields.CompletionField(attr='title')

    class Index:
        name = Indices.LINKS.value
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    class Django:
        model = Link

        fields = ['title', 'link', 'private']

    @classmethod
    def get_suggestions(cls, query):
        return get_title_suggestions(cls, query)

    @staticmethod
    def build_query(search_query):
        return Q("match", title={'query': search_query, 'boost': 5}) \
                | Q("match", subjects={'query': search_query, 'boost': 5}) \
                | Q("match_phrase_prefix", title={'query': search_query, 'max_expansions': 10}) \
                | Q("match", title={'query': search_query, 'fuzziness': 'AUTO'}) \
                | Q("match", link={'query': search_query, 'fuzziness': 'AUTO'}) \
                | Q("match_phrase_prefix", link={'query': search_query, 'max_expansions': 3})
class CorpusContentDocument(Document):
    class Index:
        name = settings.INDEX_CORPUS
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    class Django:
        model = CorpusContent
        fields = [
            'id',
            'phrase',
        ]
        related_models = [Corpus]

    corpus = fields.ObjectField(
        properties={
            'id': fields.IntegerField(),
            'name': fields.TextField(),
            'description': fields.TextField(),
            'language': fields.TextField(),
            'user': fields.TextField(attr="get_user_id"),
        })

    def get_instances_from_related(self, related_instance):
        """If related_models is set, define how to retrieve the instance(s) from the related model."""
        if isinstance(related_instance, Corpus):
            return related_instance.corpuscontent_set.all()
Exemple #3
0
class OfferDocType(DocType):
    fullname = fields.TextField(analyzer='with_morphology')
    category_name = fields.TextField(analyzer='with_morphology')
    fullname_translit = fields.TextField(analyzer='autocomplete')
    is_published = fields.BooleanField()
    views = fields.IntegerField()

    class Django:
        model = Offer

    def prepare_fullname(self, instance):
        return ' '.join([
            instance.product.article, instance.product.search_title,
            instance.product.title
        ])

    def prepare_category_name(self, instance):
        return instance.product.category.title

    def prepare_fullname_translit(self, instance):
        result = ' '.join([
            instance.product.article, instance.product.search_title,
            instance.product.title
        ])
        return translit(result, 'ru')

    def prepare_is_published(self, instance):
        return not instance.product.category.get_ancestors(
            include_self=True).filter(is_published=False).exists()

    def prepare_views(self, instance):
        return instance.product.category.views
Exemple #4
0
class ProductDocument(Document):
    title = fields.TextField(
        analyzer=custom_analyzer,
        fields={ 'raw' : fields.KeywordField() }
    )
    description = fields.TextField(
        analyzer=custom_analyzer,
        fields={'raw' : fields.KeywordField()}
    )
    features = fields.TextField(
        analyzer=custom_analyzer,
        fields={'raw' : fields.KeywordField()}
    )

    class Index:
        name = 'products'
        settings = {'number_of_shards': 1,
                    'number_of_replicas': 0}

    class Django:
        model = Product

        fields = [
            'id',
            # 'description',
            # 'features',
            # 'title',
        ]
Exemple #5
0
class PaperDocument(Document):
    class Index:
        name = 'papers'
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 0,
            'max_result_window': 100000
        }

    authors = fields.ObjectField(properties={
        'pk': fields.IntegerField(),
        'full_name': fields.TextField(),
    })

    title = fields.TextField(analyzer='english')
    abstract = fields.TextField(analyzer='english')

    class Django:
        model = Paper

        related_models = [Author]

    def get_instances_from_related(self, related_instance):
        """If related_models is set, define how to retrieve the paper instance(s) from the related model.
        The related_models option should be used with caution because it can lead in the index
        to the updating of a lot of items.
        """
        if isinstance(related_instance, Author):
            return related_instance.publications.all()
Exemple #6
0
class NewCollegeBasicInfoDocument(Document):
    name = fields.TextField(attr='college_name',
                            fields={
                                'suggest': fields.Completion(),
                            })
    city_name = fields.TextField(fields={
        'suggest': fields.Completion(),
    })

    def prepare_city_name(self, instance):
        try:
            city_text = NewCollegeCollegeCities.objects.get(id=instance.city)
        except NewCollegeCollegeCities.DoesNotExist:
            city_text = None
        if city_text:
            return city_text.city
        return "default"

    class Django:
        model = NewCollegeBasicInfo
        fields = [
            'id',
            'college_name',
        ]

    queryset_pagination = 50
class ExerciseDocument(Document):
    """Exercise Elasticsearch document."""

    id = fields.IntegerField(attr='id')

    exercise_title = fields.TextField(
        attr='exercise_title',
        analyzer=html_strip,
        fields={
            'raw': fields.TextField(analyzer='keyword', multi=True),
            'suggest': fields.CompletionField(multi=True),
        },
    )

    muscle_group_title = fields.TextField(
        attr='muscle_group_indexing',
        analyzer=html_strip,
        fields={
            'raw': fields.TextField(analyzer='keyword', multi=True),
            'suggest': fields.CompletionField(multi=True),
        },
        multi=True,
    )

    class Django(object):
        """Inner nested class Django."""

        model = Exercise  # The model associate with this Document
Exemple #8
0
class NoteDocument(Document):
    label = fields.ObjectField(properties={
        'name': fields.TextField()
    })

    title = fields.TextField(
        analyzer=html_strip
    )
    reminder = fields.StringField(
    )
    note = fields.TextField(
        analyzer=html_strip
    )
    user_id = fields.IntegerField()

    color = fields.StringField()

    class Index:
        # Name of the Elasticsearch index
        name = 'search_note'

        settings = {'number_of_shards': 1,
                    'number_of_replicas': 0}

    class Django:
        model = Note
Exemple #9
0
class JobListingDocument(DocType):
    keywords = fields.TextField(attr="description", fielddata=True)
#    shingles = fields.TextField(attr="description", analyzer="shingle", fielddata=True)
#   triple_shingles = fields.TextField(attr="description", analyzer="triple_shingle", fielddata=True)

    location_keywords = fields.TextField(attr="location", fielddata=True)
    location_shingles = fields.TextField(attr="location", analyzer="shingle", fielddata=True)
    #location_triple_shingles = fields.TextField(attr="location", analyzer="triple_shingle", fielddata=True)

    class Meta:
        model = JobListing  # The model associated with this DocType

        # The fields of the model you want to be indexed in Elasticsearch
        fields = [
            'indeed_id',
            'posted_date',
            'title',
            'location',
            'company',
            'description',
        ]

        # Ignore auto updating of Elasticsearch when a model is saved
        # or deleted:
        # ignore_signals = True
        # Don't perform an index refresh after every update (overrides global setting):
        # auto_refresh = False
        # Paginate the django queryset used to populate the index with the specified size
        # (by default there is no pagination)
        # queryset_pagination = 5000

    @staticmethod
    def get_analyzer():
        return keyword_analyzer
class SchoolDocument(Document):

    autocomplete = fields.TextField(analyzer=label_autocomplete)
    text = fields.TextField(attr='primary_alias', boost=10)
    nicknames = fields.TextField()

    def prepare_autocomplete(self, instance):
        alias_strings = [a.alias for a in instance.alias_set.all()]
        nickname_strings = [n.nickname for n in instance.nickname_set.all()]
        return alias_strings + nickname_strings

    def prepare_nicknames(self, instance):
        return [n.nickname for n in instance.nickname_set.all()]

    class Index:
        name = 'paying-for-college'
        settings = {'number_of_shards': 1,
                    'number_of_replicas': 0}

    class Django:
        model = School

        fields = [
            'school_id',
            'city',
            'state',
            'zip5',
        ]
Exemple #11
0
class TweetsDocument(Document):
    # search_item = fields.TextField(attrs="search_item")
    excepted_words = fields.ListField(field=fields.TextField)

    author = fields.ObjectField()
    entities = fields.ObjectField()
    text = fields.TextField(fields={"raw": fields.KeywordField()},
                            analyzer="text_analyser")
    raw_text = fields.TextField()

    retweet_count = fields.IntegerField()

    class Django:
        model = models.Tweet
        fields = [
            "search_item",
            "id",
            # "author",
            "created_date",
            # "entities",
            "source",
            "lang",
        ]

        auto_refresh = False
        ignore_signals = False
        # Paginate the django queryset used to populate the index with the specified size (by
        # default there is no pagination)
        queryset_pagination = 1000
Exemple #12
0
class TenderDoc(DocType):
    description = fields.TextField(
        analyzer=case_insensitive_analyzer,
        fielddata=True,
        fields={
            'raw':
            fields.KeywordField(multi=True,
                                ignore_above=256,
                                normalizer=case_insensitive_normalizer)
        })

    title = fields.TextField(analyzer=case_insensitive_analyzer,
                             fielddata=True,
                             fields={
                                 'raw':
                                 fields.KeywordField(
                                     multi=True,
                                     ignore_above=256,
                                     normalizer=case_insensitive_normalizer)
                             })

    reference = fields.KeywordField(attr='reference')

    class Meta:
        model = Tender
        fields = [
            'unspsc_codes',
            'cpv_codes',
            'organization',
            'source',
            'notified',
        ]
class MemberDocument(Document):
    """Member Elasticsearch document."""
    id = fields.IntegerField(attr='id')
    full_name = StringField(analyzer=html_strip,
                            fields={
                                'raw': KeywordField(),
                                'suggest': fields.CompletionField(),
                            })
    birthday = fields.DateField()
    roles = fields.NestedField(
        properties={
            'title':
            fields.TextField(analyzer=html_strip,
                             attr='role',
                             fields={
                                 'raw': KeywordField(),
                             }),
        })
    member_url = fields.TextField(attr='get_absolute_url')
    image = fields.FileField(attr="image")

    class Django(object):
        """Inner nested class Django."""

        model = Members  # The model associate with this Document
class CompanyDocument(DocType):
    # ID
    id = fields.IntegerField(attr='id')
    avatar = fields.TextField()
    slug = fields.StringField()
    name = StringField(analyzer=html_strip, fields={
        'raw': KeywordField(),
    })

    about = fields.TextField()
    headquarter = fields.TextField()
    is_address_public = fields.BooleanField()
    website = fields.StringField()
    since = fields.StringField()
    size_from = fields.IntegerField()
    size_to = fields.IntegerField()
    creator = fields.ObjectField(attr='create_field_indexing',
                                 properties={'id': fields.IntegerField()})
    tags = fields.ObjectField(attr='tag_field_indexing',
                              properties={
                                  'id': fields.IntegerField(),
                                  'name': fields.StringField()
                              })

    class Django:
        model = Company
Exemple #15
0
class PersonDocument(BaseDocument):
    profile_image = es_fields.TextField(attr='profile_image_indexing')
    user_reputation = es_fields.IntegerField(attr='user_reputation_indexing')
    author_score = es_fields.IntegerField(attr='author_score')
    description = es_fields.TextField(attr='description',
                                      analyzer=content_analyzer)
    full_name = es_fields.TextField(attr='full_name',
                                    analyzer=content_analyzer)
    person_types = es_fields.KeywordField(attr='person_types_indexing')
    headline = es_fields.ObjectField(attr='headline',
                                     properties={
                                         'title': es_fields.TextField(),
                                     })

    class Index:
        name = 'person'

    class Django:
        model = Author
        fields = [
            'id',
            'first_name',
            'last_name',
        ]

    def should_remove_from_index(self, obj):
        should_delete = False
        try:
            author_user = User.objects.get(id=obj.user_id)
            if author_user.is_suspended:
                should_delete = True
        except Exception as e:
            pass

        return should_delete
Exemple #16
0
class ElasticSearchDocument(ESDocument):
    '''
    Elastic Search Management Class
    '''
    class Index:
        '''
        Elastic Search Index Management
        '''
        # Name of the Elasticsearch index
        name = 'mydocument'  # must be lowercase for es requirements
        # See Elasticsearch Indices API reference for available settings
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    content = fields.TextField(
        analyzer="ik_max_word",
        search_analyzer="ik_smart",
    )
    title = fields.TextField(
        analyzer="ik_max_word",
        search_analyzer="ik_smart",
    )

    class Django:
        '''
        Django Supervisor for Elastic Search
        can auto update ES when encountering db change
        '''
        model = MyDocument  # The model associated with this ES Document

        # The fields of the model you want to be indexed in Elasticsearch
        fields = [
            'id', 'status', 'src'
            # 'content', # configured manually above (Due to Chinese analyzer)
        ]
Exemple #17
0
class PostDocument(DocType):
    '''
    适配Elasticsearch的类,跟着django-elasticsearch-dsl写的。
    '''
    title = fields.TextField(analyzer=ik_max_word, search_analyzer=ik_max_word)
    content = fields.TextField(analyzer=ik_max_word,
                               search_analyzer=ik_max_word)
    category = fields.ObjectField(properties={
        'name': fields.TextField(),
        'cate_id': fields.TextField()
    })

    def get_queryset(self):
        return super(PostDocument,
                     self).get_queryset().select_related('category')

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, Category):
            return related_instance.in_category_posts.all()

    class Meta:
        model = Post

        fields = [
            'abstract',
            'create_time',
            'update_time',
            'status',
        ]

        related_models = [
            Category,
        ]
class ArticleDocument(Document):
    category = fields.ObjectField(
        properties={
            'id': fields.TextField(),
            'name': fields.TextField(),
            'image': fields.FileField(),
        })
    cover = fields.FileField()
    tags = fields.ListField(fields.TextField())

    class Index:
        name = 'articles'
        settings = {'number_of_shards': 1, 'number_of_replicas': 1}

    class Django:
        model = Articles
        fields = [
            'id', 'updated_at', 'title', 'subtitle', 'author_name', 'user',
            'realease', 'is_active', 'slug'
        ]
        # related_models = [Category]

    def get_queryset(self):
        """Not mandatory but to improve performance we can select related in one sql request"""
        return super(ArticleDocument,
                     self).get_queryset().select_related('category')
Exemple #19
0
class ArticleDocument(Document):
    class Index:
        # Name of the Elasticsearch index
        name = 'articles'
        # See Elasticsearch Indices API reference for available settings
        settings = {'number_of_shards': 1,
                    'number_of_replicas': 0}

    get_absolute_url = fields.TextField(attr="get_absolute_url")
    get_type_display = fields.TextField(attr="get_type_display")
    get_lead = fields.TextField(attr="get_lead")
    section = fields.TextField()

    def prepare_section(self, instance):
        return instance.section.__unicode__() if instance.section else u''

    class Django:
        model = Article  # The model associated with this Document

        # The fields of the model you want to be indexed in Elasticsearch
        fields = [
            'headline',
            'deck',
            'lead',
            'body',
            'home_lead',
            'is_published',
            'date_published',
            'type',
        ]
Exemple #20
0
class NoteDocument(Document):
    """
    Document for the Note index.
    """

    id = fields.IntegerField(attr='id')
    user = fields.KeywordField(attr='user_id')
    course_id = fields.KeywordField()
    usage_id = fields.KeywordField()
    quote = fields.TextField(analyzer=html_strip)
    text = fields.TextField(analyzer=html_strip)
    ranges = fields.KeywordField()
    created = fields.DateField()
    updated = fields.DateField()
    tags = fields.TextField(multi=True, analyzer=case_insensitive_keyword)

    def prepare_data(self, instance):
        """
        Prepare data.
        """
        return '{0}{1}'.format(instance.text, instance.tags)

    def prepare_tags(self, instance):
        return json.loads(instance.tags)

    class Django:
        model = Note

    class Meta:
        parallel_indexing = True
        queryset_pagination = 50
Exemple #21
0
class ManufacturerDocument(Document):
    name = fields.TextField()
    address = fields.TextField()
    country = fields.ObjectField(
        properties={
            'name': fields.TextField()
        }
    )

    class Index:
        name = 'manufacturers'

    class Django:
        model = Manufacturer
        related_models = [Country, ]

    def get_queryset(self):
        return super(ManufacturerDocument, self).get_queryset().select_related(
            'country_id',
        )

    def get_instances_from_related(self, related_instance):
        """If related_models is set, define how to retrieve the Country instance(s) from the related model.
        The related_models option should be used with caution because it can lead in the index
        to the updating of a lot of items.
        """
        if isinstance(related_instance, Country):
            return related_instance.manufacturer_set.all()
        return []
Exemple #22
0
class UniversityDocument(Document):
    """University Elasticsearch document."""

    id = fields.IntegerField(attr='id')
    name = fields.TextField(fields={
        'raw':
        fields.KeywordField(normalizer=keyword_normalizer),
    }, )
    domain = fields.TextField(
        fields={'raw': fields.KeywordField(normalizer=keyword_normalizer)})
    web_page = fields.TextField(
        fields={'raw': fields.KeywordField(normalizer=keyword_normalizer)})
    country = fields.ObjectField(
        properties={
            'name':
            fields.TextField(),
            'code':
            fields.TextField(
                fields={
                    'raw': fields.KeywordField(normalizer=keyword_normalizer)
                }),
        })
    createdAt = fields.DateField()
    isDelete = fields.BooleanField()

    class Django(object):
        """Meta options."""

        model = University  # The model associate with this DocType
Exemple #23
0
class ProjectDocument(RTDDocTypeMixin, DocType):
    class Meta(object):
        model = Project
        fields = ('name', 'slug', 'description')
        auto_refresh = False

    url = fields.TextField(attr='get_absolute_url')
    users = fields.NestedField(properties={
        'username': fields.TextField(),
        'id': fields.IntegerField(),
    })
    language = fields.KeywordField()

    @classmethod
    def faceted_search(cls, query, language=None, using=None, index=None):
        kwargs = {
            'using': using or cls._doc_type.using,
            'index': index or cls._doc_type.index,
            'doc_types': [cls],
            'model': cls._doc_type.model,
            'query': query
        }

        if language:
            kwargs['filters'] = {'language': language}

        return ProjectSearch(**kwargs)
Exemple #24
0
class ProjectDocument(DocType):

    # Metadata
    url = fields.TextField(attr='get_absolute_url')
    users = fields.NestedField(
        properties={
            'username': fields.TextField(),
            'id': fields.IntegerField(),
        }
    )
    language = fields.KeywordField()

    modified_model_field = 'modified_date'

    class Meta:
        model = Project
        fields = ('name', 'slug', 'description')
        ignore_signals = True

    @classmethod
    def faceted_search(cls, query, user, language=None):
        from readthedocs.search.faceted_search import ProjectSearch
        kwargs = {
            'user': user,
            'query': query,
        }

        if language:
            kwargs['filters'] = {'language': language}

        return ProjectSearch(**kwargs)
Exemple #25
0
class NoteDocument(DocType):
    """
    Using decorator create the notedocument class
    which describe the fields of note model
    """
    title = fields.StringField(
        analyzer=html_strip,
        fields={
            'raw': fields.StringField(analyzer='keyword'),
        }
    )
    description = fields.TextField(
        analyzer=html_strip,
        fields={
            'raw': fields.TextField(analyzer='keyword'),
        }
    )

    color = fields.StringField(
       analyzer=html_strip,
     fields={
         'raw': fields.StringField(analyzer='keyword'),
     }
     )
    reminder = fields.StringField(
        analyzer=html_strip,
        fields={
            'raw': fields.StringField(analyzer='keyword'),
        }
    )

    # defining the meta class
    class Meta(object):
        model = Note
Exemple #26
0
class TokenSearch(Document):
    source = fields.TextField(attr="source_to_str")
    searchq = fields.TextField(analyzer='keyword')

    class Index:
        # name of the elasticsearch index
        name = 'tokens'
        # see Elasticsearch Indices API reference for available settings
        settings = common_settings

    class Django:
        model = Token
        fields = common_fields

    def to_dict(self):
        return {
            "id": self.id,
            "name": self.name,
            "priority": self.priority,
            "source": self.source,
            "source_verbose": self.source_verbose,
            "dpi": self.dpi,
            "searchq": self.searchq,
            "thumbpath": self.thumbpath,
            "date": dateformat.format(self.date, datestring),
        }
class TMContentDocument(Document):
    class Index:
        name = settings.INDEX_TM
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}

    class Django:
        model = TMContent
        fields = [
            'id',
            'src_sentence',
            'tar_sentence',
        ]
        related_models = [TranslationMemory]

    translation_memory = fields.ObjectField(
        properties={
            'id': fields.IntegerField(),
            'name': fields.TextField(),
            'description': fields.TextField(),
            'src_lang': fields.TextField(),
            'tar_lang': fields.TextField(),
            'user': fields.TextField(attr="get_user_id"),
        })

    def get_instances_from_related(self, related_instance):
        """If related_models is set, define how to retrieve the instance(s) from the related model."""
        if isinstance(related_instance, TranslationMemory):
            return related_instance.tmcontent_set.all()
Exemple #28
0
class CarDocument(DocType):
    name = fields.TextField(attr='name',
                            fields={
                                'suggest': fields.Completion(),
                            })
    manufacturer = fields.ObjectField(properties={
        'name': fields.TextField(),
        'country_code': fields.TextField(),
    })
    auction_title = fields.TextField(attr='get_auction_title')
    points = fields.IntegerField()

    def prepare_points(self, instance):
        if instance.color == 'silver':
            return 2
        return 1

    class Meta:
        model = Car
        fields = [
            'id',
            'color',
            'description',
            'type',
        ]

        related_models = [Manufacturer]

    def get_queryset(self):
        return super().get_queryset().select_related('manufacturer')

    def get_instances_from_related(self, related_instance):
        if isinstance(related_instance, Manufacturer):
            return related_instance.car_set.all()
Exemple #29
0
            class CarDocument(DocType):
                color = fields.TextField()
                name = fields.TextField()

                class Django:
                    fields = ['name']
                    model = Car
class HubDocument(BaseDocument):
    auto_refresh = True

    paper_count = es_fields.IntegerField(attr='paper_count')
    subscriber_count = es_fields.IntegerField(attr='subscriber_count')
    discussion_count = es_fields.IntegerField(attr='discussion_count')
    description = es_fields.TextField(attr='description',
                                      analyzer=content_analyzer)
    name = es_fields.TextField(attr='name', analyzer=title_analyzer)

    class Index:
        name = 'hub'

    class Django:
        model = Hub
        fields = [
            'id',
            'acronym',
            'is_locked',
        ]

    def should_remove_from_index(self, obj):
        if obj.is_removed or obj.is_locked:
            return True

        return False