Example #1
0
class AnquankeArticleType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word')
    author = Text(analyzer='ik_max_word')
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    view_count = Integer()
    comment_count = Integer()
    like_count = Integer()
    front_image_url = Keyword()
    front_image_path = Keyword()
    tags = Text(analyzer='ik_max_word')
    content = Text(analyzer='ik_max_word')

    class Meta:
        index = 'anquanke'
        doc_type = 'anquanke_article'
class ZhiHuType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    #伯乐在线文章类型
    # 知乎的问题 item
    zhihu_id = Keyword()
    topics = Keyword()
    url = Keyword()
    title = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")
    answer_num = Integer()
    comments_num = Integer()
    watch_user_num = Integer()
    click_num = Integer()
    crawl_time = Date()

    class Meta:
        index = "zhihu"
        doc_type = "question"
class Paper(Document):
    """
    Paper object inside elasticserach
    """
    title = Text(analyzer='snowball', fields={'raw': Keyword()})
    abstract = Text(analyzer='snowball')
    full_text = Text(analyzer='snowball')
    authors = Text(analyzer='snowball')
    date = Date()

    class Index:
        name = 'arxiv_papers'
        settings = {
          "number_of_shards": 2,
        }

    def save(self, ** kwargs):
        return super(Paper, self).save(** kwargs)
Example #4
0
class LagouJobType(DocType):
    title = Text(analyzer="ik_max_word")
    salary = Text(analyzer="ik_max_word")
    tags = Text(analyzer="ik_max_word")
    job_addr = Text(analyzer="ik_max_word")
    job_advantage = Keyword()
    job_city = Keyword()
    job_type = Keyword()
    job_desc = Text(analyzer="ik_max_word")
    company_url = Keyword()
    url = Keyword()
    company_name = Keyword()
    publish_time = Date()
    suggest = Completion(analyzer=ik_analyzer)

    class Meta:
        index = "lagou"
        doc_type = "job"
Example #5
0
class Pagina(Document):
    autor = Text()
    titulo = Text()
    categoria = Text()
    idioma = Text()
    instituicao = Text()
    acessos = int()
    pagina = int()
    base64 = Text()
    texto = Text()
    created_at = Date()

    class Index:
        name = ELASTICSEARCH_INDEX

    def save(self, ** kwargs):
        self.created_at = datetime.now()
        return super().save(** kwargs)
Example #6
0
class ArticleType(DocType):
    #伯乐在线文章类型
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")  # 标题
    create_time = Date()  # 创建时间
    url = Keyword()  # url
    url_object_id = Keyword()  # url md5
    front_image_url = Keyword()  # 列表图片url
    front_path_url = Keyword()  # 本地图片url
    praise_num = Integer()  # 点赞数
    comment_num = Integer()  # 评论数
    fav_num = Integer()  # 收藏数
    tags = Text(analyzer="ik_max_word")  # 标签
    content = Text(analyzer="ik_max_word")  # 内容

    class Meta:
        index = "jobbole"
        doc_type = "article"
Example #7
0
class JobboleArticleType(Document):
    suggest = Completion(analyzer="ik_max_word")
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    url = Keyword()
    image_url = Keyword()
    image_path = Keyword()
    vote_num = Integer()
    comment_num = Integer()
    book_num = Integer()
    tags = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")

    class Index:
        name = "jobbole"

    class Meta:
        doc_type = "article"
Example #8
0
class ArticleType(DocType):
    #boleonline type
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word')  # parent index cannot have same title
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()  #md5 for long url
    front_image_url = Keyword()
    front_image_path = Keyword()  #upgrade in pipiline and settings
    praise_nums = Integer()
    comment_nums = Integer
    fav_nums = Integer
    tags = Text(analyzer='ik_max_word')
    content = Text(analyzer='ik_max_word')

    class Meta:
        index = 'jobbole'
        doc_type = 'article'
class ArticleType(DocType):
    #伯乐在线文章类型
    suggest = Completion(analyzer=ik_analyzer)  # 指定suggest为Completion type,完成自动补全功能  # analyzer="ik_max_word"    
    title = Text(analyzer="ik_max_word")  #与es中的type相对应  #设置分析器
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    praise_nums = Integer()
    comment_nums = Integer()
    fav_nums = Integer()
    tags = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")

    class Meta:
        index = "jobbole"      #index
        doc_type = "article"   #type
Example #10
0
class ArticleType(DocType):
    # 博客园文章类型
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")
    create_time = Date()
    url = Keyword()
    url_object_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    praise_nums = Integer()
    comment_nums = Integer()
    fav_nums = Integer()
    tags = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")

    class Meta:
        index = "cnblogs"
        doc_type = "article"
Example #11
0
class Mail(DocType):
    mail_from = Text()
    attachments = Keyword()
    mail_to = Text()
    tags = Keyword()
    sent_day = Date()
    att_count = Integer()
    virus = Keyword()
    id = Text()

    class Meta:
        index = 'mail'

    def save(self, **kwargs):
        return super(Mail, self).save(**kwargs)

    def add_tag(self, tag):
        self.tags.append({'tags': tag})
Example #12
0
class ArticleType(DocType):
    # 伯乐在线文章item
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    url = Keyword()
    url_obj_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    thumb_up_num = Integer()
    bookmark_num = Integer()
    comment_num = Integer()
    content = Text(analyzer="ik_max_word")
    tags = Text(analyzer="ik_max_word")

    class Meta:
        index = "jobbole"
        doc_type = "article"
    class Doc(Document):
        doc = Text()
        created_at = Date()
        qa_pair = Nested(QA)

        class Index:
            name = es_param.index_name

        def add_qa_pair(self, ans_id, ans_str, query_id, query_str):
            self.qa_pair.append(
                QA(ans_id=ans_id,
                   ans_str=ans_str,
                   query_id=query_id,
                   query_str=query_str))

        def save(self, **kwargs):
            self.created_at = datetime.now()
            return super().save(**kwargs)
Example #14
0
class Image(DocType):
    title = String(analyzer="english")
    identifier = String(index="not_analyzed")
    creator = String()
    creator_url = String(index="not_analyzed")
    tags = String(multi=True)
    created_on = Date()
    url = String(index="not_analyzed")
    thumbnail = String(index="not_analyzed")
    provider = String(index="not_analyzed")
    source = String(index="not_analyzed")
    license = String(index="not_analyzed")
    license_version = String()
    foreign_landing_url = String(index="not_analyzed")
    removed_from_source = Boolean()

    class Meta:
        index = settings.ELASTICSEARCH_INDEX
Example #15
0
class ArticleType(DocType):
    # 设置elasticsearch的mappings
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer=ik_analyzer)
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    praise_nums = Integer()
    comment_nums = Integer()
    fav_nums = Integer()
    tags = Text(analyzer=ik_analyzer)
    content = Text(analyzer=ik_analyzer)

    class Meta:
        index = 'jobbole'
        doc_type = "article"
Example #16
0
class ArticleType(DocType):
    # 文章类型
    #title = Text(analyzer="ik_max_word")
    create_date = Date()

    ranking = Keyword()
    movie_name = Keyword()

    score = Keyword()
    score_num = Keyword()

    #tags = Text(analyzer="ik_max_word")
    #content = Text(analyzer="ik_max_word")

    class Meta:
        # 数据库名称和表名称
        index = "douban"
        doc_type = "movice"
Example #17
0
class ArticleType(DocType):
    #伯乐在线文章类型
    suggest = Completion(analyzer=ik_analyzer)  # 自动不全的字段
    title = Text(analyzer='ik_max_word')
    create_date = Date()
    zan = Integer()
    store = Integer()
    comments = Integer()
    contents = Text(analyzer='ik_max_word')
    tags = Text(analyzer='ik_max_word')
    front_img_url = Keyword()
    front_img_path = Keyword()
    url = Keyword()
    url_md5 = Keyword()

    class Meta:
        index = 'jobbole'
        doc_type = 'article'
Example #18
0
class ArticleType(DocType):
    # Cnblogs Type Mappings
    suggest = Completion(analyzer=ik_analyzer)  # careful!
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    url = Keyword()  # 全部保存不做分词
    url_object_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    like_nums = Integer()
    view_nums = Integer()
    comment_nums = Integer()
    tags = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")

    class Meta:
        index = "cnblogs"
        doc_type = "article"
Example #19
0
class FilmType(DocType):
    # 实现搜索建议
    suggest = Completion(analyzer=ik_analyzer)
    # 定义主要字段
    url = Keyword()
    title = Text(analyzer="ik_max_word")
    magnet = Keyword()
    publish_time = Date()
    content = Text(analyzer="ik_max_word")
    imdb_score = Double()
    douban_score = Double()
    ftp_address = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()

    class Meta:
        index = 'entertainment'
        doc_type = 'film'
Example #20
0
class EsArea(Document):
    suggest = Completion(analyzer='ik_max_word')
    continentName = Text(analyzer='ik_max_word')
    countryName = Text(analyzer='ik_max_word')
    provinceName = Text(analyzer='ik_max_word')
    cityName = Text(analyzer='ik_max_word')
    confirmedCount = Integer()
    suspectedCount = Integer()
    curedCount = Integer()
    deadCount = Integer()
    currentConfirmedCount = Integer()
    updateTime = Date()

    class Index:
        name = "area"
        settings = {
            "number_of_shards": 3,
        }
Example #21
0
class ChatUpdate(DocType):
    channel_id = Keyword()
    from_id = Keyword()
    title = Text()
    about = Text()
    pinnedMessage = Text()
    sentimentPolarity = Float()
    sentimentSubjectivity = Float()
    username = Keyword()
    participants_count = Integer()
    created_at = Date()

    class Meta:
        index = 'chatupdates'

    def save(self, **kwargs):
        self.created_at = datetime.utcnow()
        return super().save(**kwargs)
Example #22
0
class EsOverAll(Document):
    confirmedCount = Integer()
    confirmedIncr = Integer()
    suspectedCount = Integer()
    suspectedIncr = Integer()
    curedCount = Integer()
    curedIncr = Integer()
    deadCount = Integer()
    deadIncr = Integer()
    currentConfirmedCount = Integer()
    currentConfirmedIncr = Integer()
    updateTime = Date()

    class Index:
        name = "overall"
        settings = {
            "number_of_shards": 3,
        }
Example #23
0
class CareersType(DocType):
    # 招聘会
    suggest = Completion(analyzer=ik_analyzer)
    url = Keyword()
    tianyan_company_url = Keyword()
    company_name = Text(analyzer="ik_max_word")
    professionals = Text(analyzer="ik_max_word")
    company_property = Text(analyzer="ik_max_word")
    industry_category = Text(analyzer="ik_max_word")
    city_name = Text(analyzer="ik_max_word")
    meet_name = Keyword()
    school_name = Text(analyzer="ik_max_word")
    meet_time = Date()
    address = Keyword()

    class Meta:
        index = "careers"
        doc_type = "careers_type"
Example #24
0
class ArticleType(DocType):
    #伯乐在线文章类型
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")  #定义存储类型,分析器
    create_date = Date()
    url = Keyword()  # 文章链接
    url_object_id = Keyword()  # md5后链接
    front_image_url = Keyword()  # 图片链接
    front_image_path = Keyword()  # 图片存放路径
    praise_number = Integer()
    fav_nums = Integer()
    comment_nums = Integer()
    tags = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")

    class Meta:
        index = "jobbole"  # 设置index名
        doc_type = "article"  # 设置表名
Example #25
0
class IndexedCMSPage(Document):
    body = Text(analyzer='english')
    description = Text(analyzer='english')
    django_id = Text(fields={'_exact': Keyword()})
    language = Text(analyzer='english')
    page_id = Long()
    pub_date = Date()
    site_id = Long()
    slug = Text(analyzer='english', fields={'_exact': Keyword()})
    text = Text(analyzer='english')
    title = Text(analyzer='english', fields={'_exact': Keyword()})
    url = Text(fields={'_exact': Keyword()})

    class Index:
        name = settings.ES_INDICES['web_content']['alias']

    class Meta:
        dynamic = MetaField('strict')
Example #26
0
class Douban(Document):
    url = Keyword()
    title = Text(analyzer="ik_max_word")
    time = Date()
    director = Keyword()
    area = Keyword()
    language = Text(analyzer="ik_max_word")
    nickname = Text(analyzer="ik_max_word")
    score = Keyword()
    introduction = Text(analyzer="ik_max_word")
    front_image_url = Keyword()
    front_image_path = Keyword()

    class Meta:
        doc_type = "info"

    class Index:
        name = "douban"
Example #27
0
class ZhiHuQuestionIndex(Document):
    """知乎问题"""
    suggest = Completion(analyzer=my_analyzer)
    question_id = Keyword()
    topics = Text(analyzer="ik_max_word")
    url = Keyword()
    title = Text(analyzer="ik_max_word")
    title_keyword = Keyword()
    content = Text(analyzer="ik_max_word")
    answer_num = Integer()
    comments_num = Integer()
    watch_user_num = Integer()
    click_num = Integer()

    crawl_time = Date()

    class Index:
        name = 'zhihu_question'
class JobboleEsType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    # 注意,elasticsearch_dsl中源码对Completion没有做analyzer的选择支持,需要把ik自定义进去
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    praise_nums = Integer()
    comment_nums = Integer()
    fav_nums = Integer()
    content = Text(analyzer="ik_max_word")
    tag = Text(analyzer="ik_max_word")

    class Meta:
        index = "jobbole"
        doc_type = "article"
Example #29
0
class ArticleType(DocType):
    #伯乐在线文章类型
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()  # md5 固定url长度
    front_image_url = Keyword()
    front_image_path = Keyword()
    praise_nums = Integer()
    fav_nums = Integer()
    comments_nums = Integer()
    content = Text(analyzer="ik_max_word")
    tags = Text(analyzer="ik_max_word")

    class Meta:
        index = "jobbole"
        doc_type = "article"
Example #30
0
class XianzhiArticleType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word')
    author = Text(analyzer='ik_max_word')
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    view_count = Integer()
    follow_count = Integer()
    mark_count = Integer()
    front_image_url = Keyword()
    front_image_path = Keyword()
    tags = Text(analyzer='ik_max_word')
    content = Text(analyzer='ik_max_word')

    class Meta:
        index = 'xianzhi'
        doc_type = 'xianzhi_article'