Exemple #1
0
class Article(Document):
    title = Text(analyzer='snowball', fields={'raw': Keyword()})
    body = Text(analyzer=html_strip)
    tags = Keyword()
    language = Text()
    country = Text()
    publication_date = Date()
    source = Nested(Source)
    category = Keyword()
    url = Text()
    image_url = Text()

    class Index:
        name = "article-index"
        settings = {
          "number_of_shards": 2,
        }
Exemple #2
0
class Article(DocType):
    title_suggest = Completion(analyzer=ik_analyzer, search_analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word', search_analyzer="ik_max_word", fields={'title': Keyword()})
    id = Text()
    url = Text()
    front_image_url = Text()
    front_image_path = Text()
    create_date = Date()
    praise_nums = Integer()
    comment_nums = Integer()
    fav_nums = Integer()
    tags = Text(analyzer='ik_max_word', fields={'tags': Keyword()})
    content = Text(analyzer='ik_max_word')

    class Meta:
        index = ''
        doc_type = 'jobbole_article'
Exemple #3
0
class WeiboType(Document):
    suggest = Completion(analyzer='ik_max_word')
    id = Keyword()
    created_at = Date()
    user = Object()
    pics = Keyword(multi=True)
    reposts_count = Integer()
    comments_count = Integer()
    attitudes_count = Integer()
    text = Text(analyzer="ik_max_word")
    video_info = Object()

    class Index:
        name = "weibo"
        settings = {
            "number_of_shards": 4,
        }
Exemple #4
0
class StockMeta(BaseDocType):
    id = Keyword()
    type = Keyword()
    exchange = Keyword()
    code = Keyword()
    name = Keyword()
    listDate = Date()
    indexCategory = Keyword()
    sinaIndustry = Keyword()
    sinaConcept = Keyword()
    sinaArea = Keyword()
    sector = Keyword()
    industry = Keyword()

    class Meta:
        index = 'stock_meta'
        doc_type = 'doc'
Exemple #5
0
class NewsClsType(DocType):
    # 财联社新闻 构建 es 模型
    suggest = Completion(analyzer=ik_analyzer)
    article_id = Integer()
    title = Text(analyzer="ik_max_word")
    brief = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")
    create_date = Date()
    stock_code = Keyword()
    stock_name = Keyword()
    url = Keyword()
    source = Keyword()
    website = Keyword()

    class Meta:
        index = "news"
        doc_type = "news_cls"
Exemple #6
0
class CryptoCurrencyKData(BaseDocType):
    id = Keyword()
    securityId = Keyword()
    timestamp = Date()
    code = Keyword()
    open = Float()
    close = Float()
    high = Float()
    low = Float()
    volume = Float()
    preClose = Float()
    change = Float()
    changePct = Float()

    class Meta:
        all = MetaField(enabled=False)
        doc_type = 'doc'
Exemple #7
0
class JobType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    job_type = Text(analyzer="ik_max_word")
    job_city = Keyword()
    job_desc = Text(analyzer="ik_max_word")
    company_name = Text(analyzer="ik_max_word")
    tags = Text(analyzer="ik_max_word")
    job_advantage = Text(analyzer="ik_max_word")
    job_addr = Text(analyzer=ik_analyzer)

    class Meta:
        index = "jobinfo"
        doc_type = "job"
Exemple #8
0
class ZhiHuQuestionType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    # 知乎的问题 item
    zhihu_id = Keyword()
    topics = Text(analyzer="ik_max_word")
    url = Keyword()
    title = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")
    answer_num = Integer()
    comments_num = Integer()
    watch_user_num = Integer()
    click_num = Integer()
    crawl_time = Date()

    class Meta:
        index = "zhihu"
        doc_type = "question"
Exemple #9
0
class NewsType(DocType):
    title = Text(analyzer="ik_max_word")
    suggest = Completion(analyzer=ik_analyzer)
    url = Keyword()
    url_md5 = Keyword()
    category = Text(analyzer="ik_smart")
    summary = Text(analyzer="ik_smart")
    image_urls = Keyword()
    image_path = Keyword()
    from_platform = Keyword()
    news_time = Keyword()
    crawl_time = Date()
    news_score = Keyword()

    class Meta:
        index = "information"
        doc_type = "news"
Exemple #10
0
class SihouArticleType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word')
    author = Text(analyzer='ik_max_word')
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    view_count = Integer()
    praise_count = Integer()
    front_image_url = Keyword()
    front_image_path = Keyword()
    tags = Text(analyzer='ik_max_word')
    content = Text(analyzer='ik_max_word')

    class Meta:
        index = 'sihou'
        doc_type = 'sihou_article'
Exemple #11
0
class IndexedCMSPage(DocType):
    body = Text(analyzer='english')
    description = Text(analyzer='english')
    django_id = String(fields={'_exact': Keyword()})
    language = String(analyzer='english')
    page_id = Long()
    pub_date = Date()
    site_id = Long()
    slug = String(analyzer='english', fields={'_exact': Keyword()})
    text = Text(analyzer='english')
    title = String(analyzer='english', fields={'_exact': Keyword()})
    url = String(fields={'_exact': Keyword()})

    class Meta:
        index = settings.ES_INDICES['web_content']['name']
        doc_type = settings.ES_INDICES['web_content']['documents'][0]['name']
        dynamic = MetaField('strict')
Exemple #12
0
class User(Document):
    username = Text()
    email = Text()
    password = Text()
    
    name = Text()
    surname = Text()
    birthday = Date()
    gender = Text()

    login_logs = Nested(LoginLog)
    
    groups = Keyword(multi = True)
    #location = Text()
    #description = Text()
    #url = Text()
    #registerDate = Date()
    #profileImagePath = Text()
    #phone = Text()
    #website = Text()
    #postCount = Integer()
    #posts = Nested(Post)
    #friendsCount = Integer()
    #friends = Nested(User)
    
    class Index:
        name = 'user'
    
    def save(self, ** kwargs):
        return super().save(** kwargs)
    
    def add_log(self, device, ip, state, date):
        entry = LoginLog(device=device, ip=ip, state=state, date=date)
        self.login_logs.append(entry)
        return entry

    def get_login_logs(self):
        return self.login_logs
    
    def addGroup(self,group_id):
        if group_id not in self.groups:
            self.groups.append(group_id)

    def removeGroup(self, group_id):
        if group_id in self.groups:
            self.groups.remove(group_id)
class Article_4houType(DocType):
    suggest = Completion(analyzer=ik_analyzer)  #搜索建议
    image_local = Keyword()
    title = Text(analyzer="ik_max_word")
    url_id = Keyword()
    create_time = Date()
    url = Keyword()
    author = Keyword()
    tags = Text(analyzer="ik_max_word")
    watch_nums = Integer()
    comment_nums = Integer()
    praise_nums = Integer()
    content = Text(analyzer="ik_max_word")

    class Meta:
        index = "teachnical_4hou"
        doc_type = "A_4hou"
Exemple #14
0
class ArticleType(DocType):
    suggest = Completion(analyzer=ik_analyzer, search_analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    praise_nums = Integer()
    comment_nums = Integer()
    fav_nums = Integer()
    tags = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")

    class Meta:
        index = "jobbole"
        doc_type = "article"
class GeneralDoc(Document):
    symbol = Keyword()  # Text(fields={"keyword": Keyword()})
    company_name = Text()

    high_52_week = Double()
    low_52_week = Double()
    close_price = Double()
    avg_volume_30_days = Long()

    timestamp = Date()

    def save(self, **kwargs):
        self.timestamp = datetime.now()
        return super().save(**kwargs)

    class Index:
        name = "general"
Exemple #16
0
class Article(Document):
    title = Text(analyzer='snowball')
    body = Text(analyzer='snowball')
    tags = Keyword()
    published_from = Date()
    lines = Integer

    class Index:
        name = "blog"
        settings = {"number_of_shards": 3}

    def save(self, **kwargs):
        self.lines = len(self.body.split())
        return super(Article, self).save(**kwargs)

    def is_published(self):
        return datetime.now() >= self.published_from
Exemple #17
0
class TechnologyType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word')
    time = Date()
    link_url = Keyword()
    content = Text(analyzer='ik_max_word')
    url_object_id = Keyword()
    tag = Text(analyzer='ik_max_word')  # 标签
    comment_num = Integer()  # 评论数
    read_num = Integer()  # 阅读数
    Collection_num = Integer()  # 收藏数
    praise_num = Integer()  # 点赞数
    source = Keyword()  # 来源

    class Meta:
        index = "technology"
        doc_type = "tec_articles"
Exemple #18
0
class B2BProductIndex(DocType):
    django_id = Integer()
    name = String(analyzer='snowball', fields={'raw': String(index='no')})
    description = String(analyzer=html_strip)
    country = Integer()
    branches = Integer(multi=True)
    b2b_categories = Integer(multi=True)
    organization = Integer()
    price = Double()
    is_active = Boolean()
    is_deleted = Boolean()
    created_at = Date()

    @staticmethod
    def get_model():
        from b24online.models import B2BProduct
        return B2BProduct

    @classmethod
    def get_queryset(cls):
        return cls.get_model().objects.all().prefetch_related('company', 'company__countries', 'branches')

    @classmethod
    def to_index(cls, obj):
        index_instance = cls(
            django_id=obj.pk,
            name=obj.name,
            description=obj.description,
            organization=obj.company.pk,
            is_active=obj.is_active,
            is_deleted=obj.is_deleted,
            country=obj.company.country.pk,
            price=obj.cost,
            created_at=obj.created_at
        )

        index_instance.b2b_categories = list(set([item for category in obj.categories.all()
                                             for item in
                                             category.get_ancestors(include_self=True).values_list('pk', flat=True)]))

        index_instance.branches = list(set([item for branch in obj.branches.all()
                                       for item in
                                       branch.get_ancestors(include_self=True).values_list('pk', flat=True)]))

        return index_instance
Exemple #19
0
class UserIndex(Document):
    user_id = Long()
    name = Text()
    name_keyword = Keyword()
    skills = Nested(UserSkillIndex)
    created_at = Date()

    class Index:
        name = IndexName.USER_INDEX
        settings = {
            "number_of_shards": 2,
        }

    def save(self, **kwargs):
        self.created_at = datetime.now()
        self.name_keyword = self.name
        return super().save(**kwargs)

    def add_skill(self, skill_id, name, score):
        self.skills.append(
            UserSkillIndex(skill_id=skill_id,
                           name=name,
                           score=score,
                           name_keyword=name))

    @staticmethod
    def search_by_skill_name(skill_name, using=None, index=None):
        skill_sort = {
            'skills.score': {
                'order': 'desc',
                'mode': 'min',
                'nested_path': 'skills',
                'nested_filter': {
                    'match': {
                        'skills.name': skill_name
                    }
                }
            }
        }
        search_obj = UserIndex.search(using=using, index=index).query(
            "nested",
            path="skills",
            query=Q('match', **{'skills.name':
                                skill_name})).sort(skill_sort, '_score')
        return search_obj
Exemple #20
0
class Project(DocType):

    name = Text()
    normalized_name = Text(analyzer=NameAnalyzer, index_options="docs")
    version = Keyword(multi=True)
    latest_version = Keyword()
    summary = Text(analyzer="snowball")
    description = Text(analyzer="snowball")
    author = Text()
    author_email = Text(analyzer=EmailAnalyzer)
    maintainer = Text()
    maintainer_email = Text(analyzer=EmailAnalyzer)
    license = Text()
    home_page = Keyword()
    download_url = Keyword()
    keywords = Text(analyzer="snowball")
    platform = Keyword()
    created = Date()
    classifiers = Keyword(multi=True)

    class Meta:
        # disable the _all field to save some space
        all = MetaField(enabled=False)

    @classmethod
    def from_db(cls, release):
        obj = cls(meta={"id": release.normalized_name})
        obj["name"] = release.name
        obj["normalized_name"] = release.normalized_name
        obj["version"] = release.all_versions
        obj["latest_version"] = release.latest_version
        obj["summary"] = release.summary
        obj["description"] = release.description
        obj["author"] = release.author
        obj["author_email"] = release.author_email
        obj["maintainer"] = release.maintainer
        obj["maintainer_email"] = release.maintainer_email
        obj["home_page"] = release.home_page
        obj["download_url"] = release.download_url
        obj["keywords"] = release.keywords
        obj["platform"] = release.platform
        obj["created"] = release.created
        obj["classifiers"] = release.classifiers

        return obj
Exemple #21
0
class Project(Document):

    name = Text()
    normalized_name = Text(analyzer=NameAnalyzer)
    version = Keyword(multi=True)
    latest_version = Keyword()
    summary = Text(analyzer="snowball")
    description = Text(analyzer="snowball")
    author = Text()
    author_email = Text(analyzer=EmailAnalyzer)
    maintainer = Text()
    maintainer_email = Text(analyzer=EmailAnalyzer)
    license = Text()
    home_page = Keyword()
    download_url = Keyword()
    keywords = Text(analyzer="snowball")
    platform = Keyword()
    created = Date()
    classifiers = Keyword(multi=True)
    zscore = Float()

    @classmethod
    def from_db(cls, release):
        obj = cls(meta={"id": release.normalized_name})
        obj["name"] = release.name
        obj["normalized_name"] = release.normalized_name
        obj["version"] = sorted(
            release.all_versions, key=lambda r: packaging.version.parse(r), reverse=True
        )
        obj["latest_version"] = release.latest_version
        obj["summary"] = release.summary
        obj["description"] = release.description
        obj["author"] = release.author
        obj["author_email"] = release.author_email
        obj["maintainer"] = release.maintainer
        obj["maintainer_email"] = release.maintainer_email
        obj["home_page"] = release.home_page
        obj["download_url"] = release.download_url
        obj["keywords"] = release.keywords
        obj["platform"] = release.platform
        obj["created"] = release.created
        obj["classifiers"] = release.classifiers
        obj["zscore"] = release.zscore

        return obj
Exemple #22
0
class _AggregateReportDoc(Document):
    class Index:
        name = "dmarc_aggregate"

    xml_schema = Text()
    org_name = Text()
    org_email = Text()
    org_extra_contact_info = Text()
    report_id = Text()
    date_range = Date()
    errors = Text()
    published_policy = Object(_PublishedPolicy)
    source_ip_address = Ip()
    source_country = Text()
    source_reverse_dns = Text()
    source_Base_domain = Text()
    message_count = Integer
    disposition = Text()
    dkim_aligned = Boolean()
    spf_aligned = Boolean()
    passed_dmarc = Boolean()
    policy_overrides = Nested(_PolicyOverride)
    header_from = Text()
    envelope_from = Text()
    envelope_to = Text()
    dkim_results = Nested(_DKIMResult)
    spf_results = Nested(_SPFResult)

    def add_policy_override(self, type_, comment):
        self.policy_overrides.append(
            _PolicyOverride(type=type_, comment=comment))

    def add_dkim_result(self, domain, selector, result):
        self.dkim_results.append(
            _DKIMResult(domain=domain, selector=selector, result=result))

    def add_spf_result(self, domain, scope, result):
        self.spf_results.append(
            _SPFResult(domain=domain, scope=scope, result=result))

    def save(self, **kwargs):
        self.passed_dmarc = False
        self.passed_dmarc = self.spf_aligned or self.dkim_aligned

        return super().save(**kwargs)
Exemple #23
0
class CompaniesHouseCompany(BaseESModel):
    """Elasticsearch representation of CompaniesHouseCompany model."""

    id = Keyword()
    company_category = fields.SortableCaseInsensitiveKeywordText()
    company_number = fields.SortableCaseInsensitiveKeywordText()
    company_status = fields.SortableCaseInsensitiveKeywordText()
    incorporation_date = Date()
    name = fields.SortableText(copy_to=[
        'name_keyword',
        'name_trigram',
    ], )
    name_keyword = fields.SortableCaseInsensitiveKeywordText()
    name_trigram = fields.TrigramText()
    registered_address_1 = Text()
    registered_address_2 = Text()
    registered_address_town = fields.SortableCaseInsensitiveKeywordText()
    registered_address_county = Text()
    registered_address_postcode = Text(
        copy_to='registered_address_postcode_trigram')
    registered_address_postcode_trigram = fields.TrigramText()
    registered_address_country = fields.nested_id_name_field()
    sic_code_1 = Text()
    sic_code_2 = Text()
    sic_code_3 = Text()
    sic_code_4 = Text()
    uri = Text()

    MAPPINGS = {
        'id': str,
        'registered_address_country': dict_utils.id_name_dict,
    }

    SEARCH_FIELDS = (
        # to match names like A & B
        'name',
        'name_trigram',
        'company_number',
        'registered_address_postcode_trigram',
    )

    class Meta:
        """Default document meta data."""

        doc_type = 'companieshousecompany'
Exemple #24
0
class ZhiHuQuestionIndex(Document):
    suggest = Completion(analyzer=my_analyzer)

    question_id = Keyword()
    topics = Text(analyzer="ik_max_word")
    url = Keyword()
    title = Text(analyzer="ik_max_word")

    content = Text(analyzer="ik_max_word")
    answer_num = Integer()
    comments_num = Integer()
    watch_user_num = Integer()
    click_num = Integer()

    crawl_time = Date()

    class Index:
        name = 'zhihu_question'
Exemple #25
0
class ArticleType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word')  # text类型会分词解析
    create_date = Date()
    url = Keyword()  # 不进行分词解析,只进行全量保存
    url_object_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    praise_num = Integer()
    comment_num = Integer()
    share_num = Integer()
    fav_num = Integer()
    tags = Text(analyzer='ik_max_word')
    content = Text(analyzer='ik_max_word')

    class Meta:
        index = "jobbole"
        doc_type = "article"
class Paper(Document):
    """
    Paper object inside elasticserach
    """
    title = Text(analyzer='snowball', fields={'raw': Keyword()})
    abstract = Text(analyzer='snowball')
    full_text = Text(analyzer='snowball')
    authors = Text(analyzer='snowball')
    date = Date()

    class Index:
        name = 'arxiv_papers'
        settings = {
          "number_of_shards": 2,
        }

    def save(self, ** kwargs):
        return super(Paper, self).save(** kwargs)
Exemple #27
0
class ArticleType(DocType):
    #伯乐在线文章类型

    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    content = Keyword()
    url = Keyword()
    url_object_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    tags = Text(analyzer="ik_max_word")



    class Meta:
        index = "jobbole"
        doc_type = "article"
Exemple #28
0
class IndexedCMSPage(Document):
    body = Text(analyzer='english')
    description = Text(analyzer='english')
    django_id = Text(fields={'_exact': Keyword()})
    language = Text(analyzer='english')
    page_id = Long()
    pub_date = Date()
    site_id = Long()
    slug = Text(analyzer='english', fields={'_exact': Keyword()})
    text = Text(analyzer='english')
    title = Text(analyzer='english', fields={'_exact': Keyword()})
    url = Text(fields={'_exact': Keyword()})

    class Index:
        name = settings.ES_INDICES['web_content']['alias']

    class Meta:
        dynamic = MetaField('strict')
Exemple #29
0
class Pagina(Document):
    autor = Text()
    titulo = Text()
    categoria = Text()
    idioma = Text()
    instituicao = Text()
    acessos = int()
    pagina = int()
    base64 = Text()
    texto = Text()
    created_at = Date()

    class Index:
        name = ELASTICSEARCH_INDEX

    def save(self, ** kwargs):
        self.created_at = datetime.now()
        return super().save(** kwargs)
Exemple #30
0
class CareersType(DocType):
    # 招聘会
    suggest = Completion(analyzer=ik_analyzer)
    url = Keyword()
    tianyan_company_url = Keyword()
    company_name = Text(analyzer="ik_max_word")
    professionals = Text(analyzer="ik_max_word")
    company_property = Text(analyzer="ik_max_word")
    industry_category = Text(analyzer="ik_max_word")
    city_name = Text(analyzer="ik_max_word")
    meet_name = Keyword()
    school_name = Text(analyzer="ik_max_word")
    meet_time = Date()
    address = Keyword()

    class Meta:
        index = "careers"
        doc_type = "careers_type"