Ejemplo n.º 1
0
class NewsType(DocType):
    title = Text(analyzer="ik_max_word")
    suggest = Completion(analyzer=ik_analyzer)
    url = Keyword()
    url_md5 = Keyword()
    category = Text(analyzer="ik_smart")
    summary = Text(analyzer="ik_smart")
    image_urls = Keyword()
    image_path = Keyword()
    from_platform = Keyword()
    news_time = Keyword()
    crawl_time = Date()
    news_score = Keyword()

    class Meta:
        index = "information"
        doc_type = "news"
Ejemplo n.º 2
0
class NewsType(Document):
    suggest = Completion(analyzer='ik_max_word')
    id = Keyword()
    pubDate = Date()
    title = Text(analyzer="ik_max_word")
    summary = Text(analyzer="ik_max_word")
    infoSource = Keyword()
    sourceUrl = Keyword()
    provinceId = Keyword()
    crawlTime = Date()
    provinceName = Text(analyzer="ik_max_word")

    class Index:
        name = "news"
        settings = {
            "number_of_shards": 1,
        }
Ejemplo n.º 3
0
class ZhihuQuestionType(DocType):

    suggest = Completion(analyzer=ik_analyzer)
    zhihu_id = Keyword()
    topics = Text(analyzer="ik_max_word")
    url = Keyword()
    title = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")
    answer_num = Integer()
    comments_num = Integer()
    watch_user_num = Integer()
    click_num = Integer()
    crawl_time = Date()

    class Meta:
        index = "zhihu_question"
        doc_type = "article"
Ejemplo n.º 4
0
class ArticalType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word')
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    praise_nums = Integer()
    comment_nums = Integer()
    favor_nums = Integer()
    tags = Text(analyzer='ik_max_word')
    content = Text(analyzer='ik_max_word')

    class Meta:
        index = "artical_linux"
        doc_type = 'jobbole'
Ejemplo n.º 5
0
class WeiboType(Document):
    suggest = Completion(analyzer='ik_max_word')
    id = Keyword()
    created_at = Date()
    user = Object()
    pics = Keyword(multi=True)
    reposts_count = Integer()
    comments_count = Integer()
    attitudes_count = Integer()
    text = Text(analyzer="ik_max_word")
    video_info = Object()

    class Index:
        name = "weibo"
        settings = {
            "number_of_shards": 4,
        }
Ejemplo n.º 6
0
class ZhihuAnswerType(DocType):
    # 伯乐在线文章类型
    suggest = Completion(analyzer=ik_analyzer)
    zhihu_id = Keyword()
    url = Keyword()
    question_id = Keyword()
    author_id = Keyword()
    content = Text(analyzer="ik_max_word")
    praise_num = Integer()
    comments_num = Integer()
    create_time = Date()
    update_time = Date()
    crawl_time = Date()

    class Meta:
        index = "zhihu"
        doc_type = "zhihuanswer"
Ejemplo n.º 7
0
class Block(ZubbiDoc):
    name_suggest = Completion(contexts=[
        {
            "name": "private",
            "type": "category",
            "path": "private"
        },
        {
            "name": "tenants",
            "type": "category",
            "path": "tenants"
        },
    ])
    repo = Text(analyzer="whitespace")
    tenants = Text(multi=True, analyzer="whitespace")
    # NOTE (fschmidt): Elasticsearch does not support context suggestion for
    # Boplean fields. As we are using the private flag to filter the auto-
    # completion results, this must be Text.
    private = Text()
    url = Text()
    description = Text(analyzer="whitespace")
    description_html = Text()
    platforms = Text(multi=True, analyzer="whitespace")
    last_updated = Date(default_timezone="UTC")

    @staticmethod
    def suggest_field():
        return "name_suggest"

    @property
    def has_html_description(self):
        return bool(self.description_html)

    @property
    def has_html_changelog(self):
        return bool(self.changelog_html)

    @property
    def description_rendered(self):
        return self._renderable_field(self.description_html, self.description)

    def _renderable_field(self, html, raw):
        if html:
            return jinja2.Markup(html)
        elif raw:
            return jinja2.Markup("<pre>{}</pre>".format(jinja2.escape(raw)))
Ejemplo n.º 8
0
class SihouArticleType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word')
    author = Text(analyzer='ik_max_word')
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    view_count = Integer()
    praise_count = Integer()
    front_image_url = Keyword()
    front_image_path = Keyword()
    tags = Text(analyzer='ik_max_word')
    content = Text(analyzer='ik_max_word')

    class Meta:
        index = 'sihou'
        doc_type = 'sihou_article'
Ejemplo n.º 9
0
class Article(DocType):
    title_suggest = Completion(analyzer=ik_analyzer, search_analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word', search_analyzer="ik_max_word", fields={'title': Keyword()})
    id = Text()
    url = Text()
    front_image_url = Text()
    front_image_path = Text()
    create_date = Date()
    praise_nums = Integer()
    comment_nums = Integer()
    fav_nums = Integer()
    tags = Text(analyzer='ik_max_word', fields={'tags': Keyword()})
    content = Text(analyzer='ik_max_word')

    class Meta:
        index = 'jobbole'
        doc_type = 'jobbole_article'
Ejemplo n.º 10
0
class ZhiHuQuestionType(DocType):
    #知乎问题类型
    suggest = Completion()
    zhihu_id = Keyword()
    topics = Text(analyzer=ik_analyzer)
    url = Keyword()
    title = Text(analyzer=ik_analyzer)
    content = Text(analyzer=ik_analyzer)
    answer_num = Integer()
    comments_num = Integer()
    watch_user_num = Integer()
    click_num = Integer()
    crawl_time = Date()

    class Meta:
        index = "zhihu"
        doc_type = "question"
Ejemplo n.º 11
0
class AnswerType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word')
    time = Date()
    link_url = Keyword()
    content = Text(analyzer='ik_max_word')
    url_object_id = Keyword()
    tag = Text(analyzer='ik_max_word')  # 标签
    comment_num = Integer()  # 评论数
    read_num = Integer()  # 阅读数
    Collection_num = Integer()  # 收藏数
    praise_num = Integer()  # 点赞数
    source = Keyword()  # 来源

    class Meta:
        index = "answer"  # 必须小写
        doc_type = "tec_answer"
Ejemplo n.º 12
0
class PipipaneType(DocType):
    # pipipan 类型
    suggest = Completion(analyzer=ik_analyzer)  # 词条推荐
    title = Text(analyzer="ik_max_word")
    url = Keyword()
    url_object_id = Keyword()
    read_num = Integer()
    type = Text(analyzer="ik_smart")
    source_website = Keyword()
    upload_time = Date()
    crawl_time = Date()
    tag = Text(analyzer="ik_smart")
    description = Text(analyzer="ik_smart")

    class Meta:
        index = "ebooksearch"
        doc_type = "pipipan"
Ejemplo n.º 13
0
class JobType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    job_type = Text(analyzer="ik_max_word")
    job_city = Keyword()
    job_desc = Text(analyzer="ik_max_word")
    company_name = Text(analyzer="ik_max_word")
    tags = Text(analyzer="ik_max_word")
    job_advantage = Text(analyzer="ik_max_word")
    job_addr = Text(analyzer=ik_analyzer)

    class Meta:
        index = "jobinfo"
        doc_type = "job"
class ArticleType(DocType):
    suggest = Completion(analyzer=ik_analyzer)  # 自动补全
    article_type = Keyword()  # 文章类型
    nick_name = Keyword()  # 昵称
    article_title = Keyword()  # 文章标题
    article_link = Keyword()  # 文章链接
    article_desc = Text(analyzer="ik_max_word")  # 文章描述
    comments = Keyword()  # 文章评论
    digg = Keyword()  # 文章点赞数
    user_name = Keyword()  # user用户名
    views = Keyword()  # 观看数
    source = Keyword()  # 来源

    class Meta:
        # 数据库名称和表名称
        index = ELASTICSEARCH_INDEX
        doc_type = ELASTICSEARCH_TYPE
Ejemplo n.º 15
0
class Declaration(DocType):
    """Declaration document.
    Assumes there's a dynamic mapping with all fields not indexed by default."""
    general = Object(
        properties={
            'full_name_suggest':
            Completion(preserve_separators=False),
            'full_name':
            String(index='analyzed'),
            'name':
            String(index='analyzed'),
            'patronymic':
            String(index='analyzed'),
            'last_name':
            String(index='analyzed'),
            'family_raw':
            String(index='analyzed'),
            'family':
            Nested(
                properties={
                    'name': String(index='analyzed'),
                    'relations': String(index='no'),
                    'inn': String(index='no')
                }),
            'post_raw':
            String(index='analyzed'),
            'post':
            Object(
                properties={
                    'region': String(index='not_analyzed'),
                    'office': String(index='not_analyzed'),
                    'post': String(index='analyzed')
                })
        })
    declaration = Object(
        properties={
            'date': NoneAwareDate(),
            'notfull': Boolean(index='no'),
            'notfull_lostpages': String(index='no'),
            'additional_info': Boolean(index='no'),
            'additional_info_text': String(index='no'),
            'needs_scancopy_check': Boolean(index='no')
        })

    class Meta:
        index = 'declarations'
Ejemplo n.º 16
0
class AtricleType(DocType):
    # 移入自定义一个analyzer
    suggest = Completion(analyzer=ik_analyzer)

    title = Text(analyzer="ik_max_word")
    create_date = Date()  # 函数处理
    url = Keyword()
    url_object_id = Keyword()
    front_image_url = Keyword()
    # 为下载图片做准备
    front_image_path = Keyword()
    pertain_to = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")

    class Meta:
        index = "jobbole"
        doc_type = "article"
Ejemplo n.º 17
0
class NewsClsType(DocType):
    # 财联社新闻 构建 es 模型
    suggest = Completion(analyzer=ik_analyzer)
    article_id = Integer()
    title = Text(analyzer="ik_max_word")
    brief = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")
    create_date = Date()
    stock_code = Keyword()
    stock_name = Keyword()
    url = Keyword()
    source = Keyword()
    website = Keyword()

    class Meta:
        index = "news"
        doc_type = "news_cls"
Ejemplo n.º 18
0
class Article_4houType(DocType):
    suggest = Completion(analyzer=ik_analyzer)  #搜索建议
    image_local = Keyword()
    title = Text(analyzer="ik_max_word")
    url_id = Keyword()
    create_time = Date()
    url = Keyword()
    author = Keyword()
    tags = Text(analyzer="ik_max_word")
    watch_nums = Integer()
    comment_nums = Integer()
    praise_nums = Integer()
    content = Text(analyzer="ik_max_word")

    class Meta:
        index = "teachnical_4hou"
        doc_type = "A_4hou"
Ejemplo n.º 19
0
class MovieType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")
    url = Keyword()
    url_object_id = Keyword()
    create_date = Keyword()
    front_image_url = Keyword()
    tags = Text(analyzer="ik_max_word")
    duration = Keyword()
    score = Keyword()
    description = Text(analyzer="ik_max_word")

    image_url = Keyword()

    class Meta:
        index = "new_movie"
        doc_type = "movie"
Ejemplo n.º 20
0
class ArticleType(DocType):
    # jobbole article
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    comment_nums = Integer()
    fav_nums = Integer()
    tags = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")

    class Meta:
        index = "jobbole"
        doc_type = "artice"
Ejemplo n.º 21
0
class ZhiHuQuestionIndex(Document):
    suggest = Completion(analyzer=my_analyzer)

    question_id = Keyword()
    topics = Text(analyzer="ik_max_word")
    url = Keyword()
    title = Text(analyzer="ik_max_word")

    content = Text(analyzer="ik_max_word")
    answer_num = Integer()
    comments_num = Integer()
    watch_user_num = Integer()
    click_num = Integer()

    crawl_time = Date()

    class Index:
        name = 'zhihu_question'
Ejemplo n.º 22
0
class ArticleType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word')  # text类型会分词解析
    create_date = Date()
    url = Keyword()  # 不进行分词解析,只进行全量保存
    url_object_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    praise_num = Integer()
    comment_num = Integer()
    share_num = Integer()
    fav_num = Integer()
    tags = Text(analyzer='ik_max_word')
    content = Text(analyzer='ik_max_word')

    class Meta:
        index = "jobbole"
        doc_type = "article"
class ZhiHuType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    #伯乐在线文章类型
    # 知乎的问题 item
    zhihu_id = Keyword()
    topics = Keyword()
    url = Keyword()
    title = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")
    answer_num = Integer()
    comments_num = Integer()
    watch_user_num = Integer()
    click_num = Integer()
    crawl_time = Date()

    class Meta:
        index = "zhihu"
        doc_type = "question"
Ejemplo n.º 24
0
class CareersType(DocType):
    # 招聘会
    suggest = Completion(analyzer=ik_analyzer)
    url = Keyword()
    tianyan_company_url = Keyword()
    company_name = Text(analyzer="ik_max_word")
    professionals = Text(analyzer="ik_max_word")
    company_property = Text(analyzer="ik_max_word")
    industry_category = Text(analyzer="ik_max_word")
    city_name = Text(analyzer="ik_max_word")
    meet_name = Keyword()
    school_name = Text(analyzer="ik_max_word")
    meet_time = Date()
    address = Keyword()

    class Meta:
        index = "careers"
        doc_type = "careers_type"
Ejemplo n.º 25
0
class LagouJobType(DocType):
    title = Text(analyzer="ik_max_word")
    salary = Text(analyzer="ik_max_word")
    tags = Text(analyzer="ik_max_word")
    job_addr = Text(analyzer="ik_max_word")
    job_advantage = Keyword()
    job_city = Keyword()
    job_type = Keyword()
    job_desc = Text(analyzer="ik_max_word")
    company_url = Keyword()
    url = Keyword()
    company_name = Keyword()
    publish_time = Date()
    suggest = Completion(analyzer=ik_analyzer)

    class Meta:
        index = "lagou"
        doc_type = "job"
Ejemplo n.º 26
0
class ESLAL(DocType):
    suggest = Completion()
    title = Text()
    url = Keyword()
    journal = Keyword()
    doi = Keyword()
    id = Keyword()
    abs_img_url = Keyword()
    abstract = Text()
    keywords = Text(fields={'keywords': Keyword()})
    authors = Text(fields={'keywords': Keyword()})
    year = Integer()
    citing_num = Integer()
    company = Keyword()

    class Meta:
        index = 'lal'
        doc_type = 'article'
Ejemplo n.º 27
0
class JobboleArticleType(Document):
    suggest = Completion(analyzer="ik_max_word")
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    url = Keyword()
    image_url = Keyword()
    image_path = Keyword()
    vote_num = Integer()
    comment_num = Integer()
    book_num = Integer()
    tags = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")

    class Index:
        name = "jobbole"

    class Meta:
        doc_type = "article"
Ejemplo n.º 28
0
class DoubanMovieType(Document):
    suggest = Completion(analyzer="ik_max_word")
    movie_name = Text(analyzer="ik_max_word")
    movie_url = Keyword()
    image_url = Keyword()
    year = Integer()
    country = Text()
    quote = Text()
    score = Text()
    ranking = Integer()
    comment_num = Integer()
    tags = Text(analyzer="ik_max_word")

    class Index:
        name = "douban"

    class Meta:
        doc_type = "movie"
Ejemplo n.º 29
0
class FilmType(DocType):
    # 实现搜索建议
    suggest = Completion(analyzer=ik_analyzer)
    # 定义主要字段
    url = Keyword()
    title = Text(analyzer="ik_max_word")
    magnet = Keyword()
    publish_time = Date()
    content = Text(analyzer="ik_max_word")
    imdb_score = Double()
    douban_score = Double()
    ftp_address = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()

    class Meta:
        index = 'entertainment'
        doc_type = 'film'
Ejemplo n.º 30
0
class ArticleType(DocType):
    #伯乐在线文章类型
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")  # 标题
    create_time = Date()  # 创建时间
    url = Keyword()  # url
    url_object_id = Keyword()  # url md5
    front_image_url = Keyword()  # 列表图片url
    front_path_url = Keyword()  # 本地图片url
    praise_num = Integer()  # 点赞数
    comment_num = Integer()  # 评论数
    fav_num = Integer()  # 收藏数
    tags = Text(analyzer="ik_max_word")  # 标签
    content = Text(analyzer="ik_max_word")  # 内容

    class Meta:
        index = "jobbole"
        doc_type = "article"