class ndrcType(Document):  # 自定义一个类来继承DocType类
    # Text类型需要分词,所以需要知道中文分词器,ik_max_wordwei为中文分词器
    name = "国家发改委"  # 国家发改委
    title = Text(analyzer="ik_max_word")  # 设置,字段名称=字段类型,Text为字符串类型并且可以分词建立倒排索引
    content = Text(analyzer="ik_max_word")
    file_content = Text(analyzer="ik_max_word")
    url = Keyword()  # 设置,字段名称=字段类型,Keyword为普通字符串类型,不分词
    date = Date()  # 设置,字段名称=字段类型,Date日期类型
    year = Integer()
    month = Integer()
    day = Integer()
    image_urls = Keyword()
    attachments = Keyword()
    class0 = Keyword()
    class1 = Keyword()
    class2 = Keyword()
    class3 = Keyword()
    website = Keyword()

    class Index:
        name = "国家发改委"  # 国家发改委                                                     # 设置索引名称(相当于数据库名称)
Beispiel #2
0
class Ips(Document):
    location = Nested(Location)
    infos = Nested(Info)
    target = Keyword()
    published_from = Date()

    class Index:
        name = 'w12scan'
        settings = {
            "number_of_shards": 2,
        }

    class Meta:
        doc_type = 'ips'

    def save(self, **kwargs):
        if not self.published_from:
            self.published_from = datetime.now()
        return super().save(**kwargs)
Beispiel #3
0
def nested_company_field(field):
    """Nested field for lists of companies."""
    return Nested(
        properties={
            'id':
            Keyword(),
            'name':
            SortableCaseInsensitiveKeywordText(
                copy_to=f'{field}.name_trigram'),
            'name_trigram':
            TrigramText(),
            'trading_name':
            SortableCaseInsensitiveKeywordText(
                copy_to=f'{field}.trading_name_trigram', ),
            'trading_name_trigram':
            TrigramText(),
        },
        include_in_parent=True,
    )
Beispiel #4
0
class ESRelatedModel(BaseESModel):
    """Elasticsearch representation of SimpleModel model."""

    id = Keyword()
    simpleton = fields.id_name_field()

    MAPPINGS = {
        'simpleton': dict_utils.id_name_dict,
    }

    SEARCH_FIELDS = ('simpleton.name', )

    class Index:
        doc_type = DEFAULT_MAPPING_TYPE

    class Meta:
        """Default document meta data."""

        doc_type = DEFAULT_MAPPING_TYPE
Beispiel #5
0
class Project(Document):

    name = Text()
    normalized_name = Text(analyzer=NameAnalyzer)
    version = Keyword(multi=True)
    latest_version = Keyword()
    summary = Text(analyzer="snowball")
    description = Text(analyzer="snowball")
    author = Text()
    author_email = Text(analyzer=EmailAnalyzer)
    maintainer = Text()
    maintainer_email = Text(analyzer=EmailAnalyzer)
    license = Text()
    home_page = Keyword()
    download_url = Keyword()
    keywords = Text(analyzer="snowball")
    platform = Keyword()
    created = Date()
    classifiers = Keyword(multi=True)
    zscore = Float()

    @classmethod
    def from_db(cls, release):
        obj = cls(meta={"id": release.normalized_name})
        obj["name"] = release.name
        obj["normalized_name"] = release.normalized_name
        obj["version"] = sorted(release.all_versions,
                                key=lambda r: packaging.version.parse(r),
                                reverse=True)
        obj["latest_version"] = release.latest_version
        obj["summary"] = release.summary
        obj["description"] = release.description
        obj["author"] = release.author
        obj["author_email"] = release.author_email
        obj["maintainer"] = release.maintainer
        obj["maintainer_email"] = release.maintainer_email
        obj["home_page"] = release.home_page
        obj["download_url"] = release.download_url
        obj["keywords"] = release.keywords
        obj["platform"] = release.platform
        obj["created"] = release.created
        obj["classifiers"] = release.classifiers
        obj["zscore"] = release.zscore

        return obj

    class Index:
        # make sure this class can match any index so it will always be used to
        # deserialize data coming from elasticsearch.
        name = "*"
Beispiel #6
0
class Doc(Document):
    page_id = Keyword()  # in es index news-page
    title = Text()
    summary = Text()
    text = Text()  # array
    lang = Keyword()
    extracted_event_ids = Keyword()
    extracted_entity_ids = Keyword()  # array
    user_event_ids = Keyword()
    user_entity_ids = Keyword()

    class Index:
        name = "learning-flair"
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}
Beispiel #7
0
class ESCachedRequestIndex(Document):
    es_index = Keyword()
    es_query = Keyword()
    es_aggs = Keyword()
    es_request_digest = Keyword()
    host = Keyword()
    run_env_type = Keyword()
    is_cached = Boolean()
    # Do not use elasticsearch_dsl Date type, it does not serializes correctly
    request_date = Integer()

    class Index:
        name = 'chembl_glados_es_cache_usage'
        using = MONITORING_CONNECTION
Beispiel #8
0
class TestRusult(Document):
    testrun_id = Keyword()
    case_id = Keyword()
    case_name = Text()
    case_tags = Keyword()
    suite_name = Keyword()
    env = Keyword()
    result = Keyword()
    case_comment = Text()
    stdout = Text()
    traceback = Text()

    class Index:
        name = 'test-result-*'
Beispiel #9
0
class MovieInfo(DocType):
    suggest = Completion()
    title = Text(analyzer=ik_analyzer)
    douban_score = Keyword()
    IMDb_score = Keyword()
    age = Keyword()
    introduction = Text(analyzer=ik_analyzer)
    type=Text(analyzer=ik_analyzer)
    url = Keyword()
    front_img_path = Keyword()
    download_url =Keyword()

    class Meta:
        index = 'movie'
Beispiel #10
0
def address_field(index_country=True):
    """Address field as nested object."""
    if index_country:
        nested_country_field = country_field()
    else:
        nested_country_field = Object(properties={
            'id': Keyword(index=False),
            'name': Text(index=False),
        }, )

    return Object(properties={
        'line_1': Text(index=False),
        'line_2': Text(index=False),
        'town': Text(index=False),
        'county': Text(index=False),
        'postcode': Text(fields={
            'trigram': TrigramText(),
        }, ),
        'country': nested_country_field,
    }, )
Beispiel #11
0
class MoviesAutoIndex(Document):
    title = Text(fields={'keyword': Keyword()})
    created = Date()
    year = Integer()
    rating = Float()
    genre = Text()
    suggest = Completion(analyzer=ascii_fold)

    def clean(self):
        self.suggest = {
            # 'input': [' '.join(p) for p in permutations(self.title.split())],
            # 'input': [' '.join(p) for p in permutations(["the","batman"])],
            # 'input': [''.join(p) for p in self.title.split(" ")],
            # i = self.title
            'input': [self.title[:j] for j in range(len(self.title), 1, -1)],
        }

    class Index:
        name = S_index
        settings = {'number_of_shards': 1, 'number_of_replicas': 0}
Beispiel #12
0
class Vocabulary(Document):
    name = Text()
    meaning = Text()
    tags = Keyword()
    score: Float()
    created_at: Date() = datetime.now()
    updated_at: Date() = datetime.now()

    class Index:
        name = "vocabularies"

    def save(self, **kwargs):
        # TODO: check vocab existence
        return super().save(**kwargs)

    @staticmethod
    def from_feed(feed_data: VocabFeed):
        return Vocabulary(name=feed_data.name,
                          meaning=feed_data.meaning,
                          score=feed_data.frequency)
Beispiel #13
0
class ElasticSearchCard(Document):
    Name = Text()
    Name_Suggest = Completion()
    Description = Text()
    Description_Suggest = Completion()
    Topic = Text()
    Topic_Suggest = Completion()

    category = Text(analyzer='snowball', fields={'raw': Keyword()})

    class Index:
        name = 'timelapsed'

    def save(self, **kwargs):
        return super().save(**kwargs)

    def delete(self, **kwargs):

        # Fix me.
        return super().delete(**kwargs)
Beispiel #14
0
class Device(MyDocType):
    device_type = Keyword()
    pos_x = Integer()
    pos_y = Integer()
    radius = Integer()
    key = Keyword()
    mqtt_account = Object(doc_class=MQTTAccount,
                          properties={
                              'username': Keyword(),
                              'password': Keyword(),
                              'server': Keyword(),
                              'port': Integer(),
                              'keep_alive': Keyword(),
                              'clients_topic': Keyword(),
                              'response_topic': Keyword()
                          })

    class Meta:
        index = 'bluetooth'

    def verify_key(self, key):
        return self.key == hash_sha256(key)
class PercolatorDoc(Document):
    """
    Document class used for storing the percolation queries.
    """
    # relevant fields from BlogPost must be also present here for the queries
    # to be able to use them. Another option would be to use document
    # inheritance but save() would have to be reset to normal behavior.
    content = Text()

    # the percolator query to be run against the doc
    query = Percolator()
    # list of tags to append to a document
    tags = Keyword(multi=True)

    class Index:
        name = 'test-percolator'
        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 0
        }
Beispiel #16
0
def contact_or_adviser_field(include_dit_team=False):
    """Object field for advisers and contacts."""
    props = {
        'id':
        Keyword(),
        'first_name':
        NormalizedKeyword(),
        'last_name':
        NormalizedKeyword(),
        'name':
        Text(fields={
            'keyword': NormalizedKeyword(),
            'trigram': TrigramText(),
        }, ),
    }

    if include_dit_team:
        props['dit_team'] = id_name_field()

    return Object(properties=props)
Beispiel #17
0
class EnumField(CustomField):
    builtin_type = Keyword()

    def __init__(self,
                 *args,
                 choice_type: Type[Enum] = None,
                 default=None,
                 **kwargs):
        super().__init__(*args, **kwargs)
        self.choice_type = choice_type
        self.default = default

    def _empty(self):
        return self.default

    def _serialize(self, data):
        return self.choice_type(data).name

    def _deserialize(self, data):
        return self.choice_type(data)
Beispiel #18
0
class Project(DocType):

    name = Text()
    normalized_name = Text(analyzer=NameAnalyzer)
    version = Keyword(multi=True)
    latest_version = Keyword()
    summary = Text(analyzer="snowball")
    description = Text(analyzer="snowball")
    author = Text()
    author_email = Text(analyzer=EmailAnalyzer)
    maintainer = Text()
    maintainer_email = Text(analyzer=EmailAnalyzer)
    license = Text()
    home_page = Keyword()
    download_url = Keyword()
    keywords = Text(analyzer="snowball")
    platform = Keyword()
    created = Date()
    classifiers = Keyword(multi=True)

    class Meta:
        # disable the _all field to save some space
        all = MetaField(enabled=False)

    @classmethod
    def from_db(cls, release):
        obj = cls(meta={"id": release.normalized_name})
        obj["name"] = release.name
        obj["normalized_name"] = release.normalized_name
        obj["version"] = sorted(
            release.all_versions,
            key=lambda r: packaging.version.parse(r),
            reverse=True,
        )
        obj["latest_version"] = release.latest_version
        obj["summary"] = release.summary
        obj["description"] = release.description
        obj["author"] = release.author
        obj["author_email"] = release.author_email
        obj["maintainer"] = release.maintainer
        obj["maintainer_email"] = release.maintainer_email
        obj["home_page"] = release.home_page
        obj["download_url"] = release.download_url
        obj["keywords"] = release.keywords
        obj["platform"] = release.platform
        obj["created"] = release.created
        obj["classifiers"] = release.classifiers

        return obj
class Post(Document):
    title = Text()
    title_suggest = Completion()
    created_at = Date()
    published = Boolean()
    category = Text(analyzer=html_strip, fields={'raw': Keyword()})

    comments = Nested(Comment)

    class Index:
        name = INDEX
        using = CONNECTION_ALIAS

    def add_comment(self, author, content):
        self.comments.append(
            Comment(author=author, content=content, created_at=datetime.now()))

    def save(self, **kwargs):
        self.created_at = datetime.now()
        return super().save(**kwargs)
Beispiel #20
0
class MyType(DocType):
    # 伯乐在线文章类型
    user_id = Keyword()
    criterion = Text(analyzer=ik_analyzer, similarity="BM25") #同义词
    intellectual_property = Text(analyzer="ik_max_word", similarity="BM25")
    paper = Text(analyzer="ik_max_word", similarity="BM25")
    research_project = Text(analyzer="ik_max_word", similarity="BM25")
    professional_certificate = Text(analyzer="ik_max_word", similarity="BM25")
    academic_activities = Text(analyzer="ik_max_word", similarity="BM25")
    experience = Text(analyzer="ik_max_word", similarity="BM25")
    further_study = Text(analyzer="ik_max_word", similarity="BM25")
    personal_register = Text(analyzer="ik_max_word", similarity="BM25")
    expert_title = Text(analyzer="ik_max_word", similarity="BM25")
    research = Text(analyzer="ik_max_word", similarity="BM25")
    domestic_studies = Text(analyzer="ik_max_word", similarity="BM25")
    professional_qualification = Text(analyzer="ik_max_word", similarity="BM25")

    class Meta:
        index = "roger"
        doc_type = "test"
Beispiel #21
0
class BuildDoc(DocType):
    id = Keyword(required=True)
    # Note! The reason for using Object() instead of Nested() is because
    # SearchKit doesn't work if it's nested. This works though.
    build = Object(_Build)
    source = Object(_Source)
    target = Object(_Target)
    download = Object(_Download)

    @classmethod
    def create(cls, id, **doc):
        assert id and isinstance(id, int) and id > 0
        return BuildDoc(
            meta={"id": id},
            id=id,
            build=_Build(**doc["build"]),
            source=_Source(**doc["source"]),
            target=_Target(**doc["target"]),
            download=_Download(**doc["download"]),
        )
Beispiel #22
0
class XuanType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    # 宣讲会根据题目和城市搜索
    title = Text(analyzer="ik_max_word")
    city = Text(analyzer="ik_max_word")
    img = Keyword()
    address = Keyword()
    time = Keyword()
    status = Keyword()
    detail_url = Keyword()
    from_school = Keyword()

    class Meta:
        index = "xuan"  # 索引===数据库
        doc_type = "info"  # 类型===表名
Beispiel #23
0
def test_mapping_can_collect_all_analyzers():
    a1 = analysis.analyzer('my_analyzer1',
        tokenizer='keyword',
        filter=['lowercase', analysis.token_filter('my_filter1', 'stop', stopwords=['a', 'b'])],
    )
    a2 = analysis.analyzer('english')
    a3 = analysis.analyzer('unknown_custom')
    a4 = analysis.analyzer('my_analyzer2',
        tokenizer=analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3),
        filter=[analysis.token_filter('my_filter2', 'stop', stopwords=['c', 'd'])],
    )
    a5 = analysis.analyzer('my_analyzer3', tokenizer='keyword')

    m = mapping.Mapping('article')
    m.field('title', 'text', analyzer=a1,
        fields={
            'english': Text(analyzer=a2),
            'unknown': Keyword(search_analyzer=a3),
        }
    )
    m.field('comments', Nested(properties={
        'author': Text(analyzer=a4)
    }))
    m.meta('_all', analyzer=a5)

    assert {
        'analyzer': {
            'my_analyzer1': {'filter': ['lowercase', 'my_filter1'], 'tokenizer': 'keyword', 'type': 'custom'},
            'my_analyzer2': {'filter': ['my_filter2'], 'tokenizer': 'trigram', 'type': 'custom'},
            'my_analyzer3': {'tokenizer': 'keyword', 'type': 'custom'},
        },
        'filter': {
            'my_filter1': {'stopwords': ['a', 'b'], 'type': 'stop'},
            'my_filter2': {'stopwords': ['c', 'd'], 'type': 'stop'},
        },
        'tokenizer': {
            'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'},
        }
    } == m._collect_analysis()

    assert json.loads(json.dumps(m.to_dict())) == m.to_dict()
Beispiel #24
0
class User(Document):
    id = Integer()
    name = Text(fields={'keywords': Keyword()})
    suggest = Completion(analyzer=ascii_fold)

    class Index:
        name = 'user'
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 0
        }

    def clean(self):
        """
        Automatically construct the suggestion input and weight by taking all
        possible permutation of Person's name as ``input`` and taking their
        popularity as ``weight``.
        """
        self.suggest = {
            'input': [' '.join(p) for p in permutations(self.name.split())],
        }
class JobType(DocType):
    #猎聘网类型
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")
    s = Keyword()
    url = Keyword()
    url_object_id = Keyword()  # md5 固定url长度
    salary = Keyword()
    work_years = Text(analyzer="ik_max_word")
    degree_need = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")
    create_date = Keyword()
    job_addr = Text(analyzer="ik_max_word")
    company_url = Keyword()
    company_name = Keyword()

    class Meta:
        index = "liepin"
        doc_type = "job"
Beispiel #26
0
class Account(DocType):
    # 机器人名字
    botName = Keyword()
    # 可用现金
    cash = Float()
    # 具体仓位
    positions = Nested()
    # 市值
    value = Float()
    # 市值+cash
    allValue = Float()
    # 时间
    timestamp = Date()

    # 收盘计算
    closing = Boolean()

    class Meta:
        index = 'account'
        doc_type = 'doc'
        all = MetaField(enabled=False)
Beispiel #27
0
class ESSimpleModel(BaseESModel):
    """Elasticsearch representation of SimpleModel model."""

    id = Keyword()
    name = fields.SortableText(copy_to=['name_keyword', 'name_trigram'])
    name_keyword = fields.SortableCaseInsensitiveKeywordText()
    name_trigram = fields.TrigramText()

    MAPPINGS = {
        'id': str,
    }

    SEARCH_FIELDS = (
        'name',
        'name_trigram',
    )

    class Meta:
        """Default document meta data."""

        doc_type = 'simplemodel'
Beispiel #28
0
class Order(DocType):

    comment = Text(fields={'raw': Keyword()})
    status = Text()
    qty = Float()

    published = Boolean()
    created_at = Date()
    delta_series = Nested(Delta)

    class Meta:
        index = 'bitmex'

    def add_delta(self, **kwargs):
        self.delta_series.append(
            Delta(created_at=datetime.now(), **kwargs)
        )

    def save(self, **kwargs):
        self.created_at = datetime.now()
        return super().save(**kwargs)
Beispiel #29
0
class SFNDNS(InnerDoc):
    event_type = Text()
    domain_name = Text(analyzer='snowball', fields={'raw': Keyword()})
    device_name = Text(analyzer='snowball', fields={'raw': Keyword()})
    host = Text(analyzer='snowball', fields={'raw': Keyword()})
    threat_id = Text(analyzer='snowball')
    threat_name = Text(analyzer='snowball')
    tag_name = Text(fields={'raw': Keyword()})
    tag_class = Text(fields={'raw': Keyword()})
    tag_group = Text(fields={'raw': Keyword()})
    tag_description = Text(analyzer='snowball')
    public_tag_name = Text(analyzer='snowball')
    confidence_level = Integer()
    sample_date = Date()
    file_type = Text(fields={'raw': Keyword()})
    updated_at = Date()
    processed = Integer()
    src_ip = Ip()
    dst_ip = Ip()
class BlogPost(Document):
    title = Text()
    published = Date()
    tags = Keyword(multi=True)
    content = Text()

    def is_published(self):
        return self.published and datetime.now() > self.published

    @classmethod
    def _matches(cls, hit):
        # override _matches to match indices in a pattern instead of just ALIAS
        # hit is the raw dict as returned by elasticsearch
        return fnmatch(hit["_index"], PATTERN)

    class Index:
        # we will use an alias instead of the index
        name = ALIAS
        # set settings and possibly other attributes of the index like
        # analyzers
        settings = {"number_of_shards": 1, "number_of_replicas": 0}