Python Text Exemples, elasticsearch_dsl.Text Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : models.py Projet : AIliucy/Scrapy-elasticsearch

class Article(DocType):
    title_suggest = Completion(analyzer=ik_analyzer,
                               search_analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word',
                 search_analyzer="ik_max_word",
                 fields={'title': Keyword()})
    id = Text()
    url = Text()
    front_image_url = Text()
    front_image_path = Text()
    create_date = Date()
    praise_nums = Integer()
    comment_nums = Integer()
    fav_nums = Integer()
    tags = Text(analyzer='ik_max_word', fields={'tags': Keyword()})
    content = Text(analyzer='ik_max_word')

    class Meta:
        index = 'lcv-search'
        doc_type = 'article'

Exemple #2

0

Afficher le fichier

class Table(SearchableResource):
    display_name = Text(required=True,
                        fields={"keyword": Keyword()},
                        analyzer=Analyzer.general_analyzer)
    database = Text(required=True,
                    fields={"keyword": Keyword()},
                    analyzer=Analyzer.general_analyzer)
    cluster = Text(required=True,
                   fields={"keyword": Keyword()},
                   analyzer=Analyzer.general_analyzer)
    schema = Text(required=True,
                  fields={"keyword": Keyword()},
                  analyzer=Analyzer.stemming_analyzer)
    columns = Text(multi=True,
                   fields={"keyword": Keyword()},
                   analyzer=Analyzer.stemming_analyzer)
    column_descriptions = Text(
        multi=True,
        fields={"alphanumeric": Text(analyzer=Analyzer.alphanum_analyzer)},
        analyzer=Analyzer.english_analyzer)

Exemple #3

0

Afficher le fichier

Fichier : index.py Projet : erlong15/Interview360Server

class SkillIndex(DocType):
    """Skill's index class."""

    id = Integer()
    name = Text(analyzer='standard')

    class Meta:
        """Index's metaclass."""

        index = 'skills'

    @classmethod
    def store_index(cls, skill):
        """Create or update skill's index."""

        obj = cls(
            meta={'id': skill.id},
            id=skill.id,
            name=skill.name
        )
        obj.save()
        return obj.to_dict(include_meta=True)

Exemple #4

0

Afficher le fichier

class DicomDoc(Document):
    data_exame = Text(analyzer='snowball')
    nome_paciente = Text(analyzer='snowball')
    descricao_estudo = Text(analyzer='snowball')
    descricao_serie = Text(analyzer='snowball')
    data_nasc_paciente = Text(analyzer='snowball')
    especialidade_exame = Text(analyzer='snowball')

    class Index:
        name = 'dicom'
        settings = {
            "number_of_shards": 2,
        }

    def save(self, **kwargs):
        self.lines = len(self.body.split())
        return super(DicomDoc, self).save(**kwargs)

Exemple #5

0

Afficher le fichier

Fichier : user.py Projet : UB-ES-2018/faceduck

class User(Document):
    username = Text()
    email = Text()
    password = Text()
    
    name = Text()
    surname = Text()
    birthday = Date()
    gender = Text()

    login_logs = Nested(LoginLog)
    
    groups = Keyword(multi = True)
    #location = Text()
    #description = Text()
    #url = Text()
    #registerDate = Date()
    #profileImagePath = Text()
    #phone = Text()
    #website = Text()
    #postCount = Integer()
    #posts = Nested(Post)
    #friendsCount = Integer()
    #friends = Nested(User)
    
    class Index:
        name = 'user'
    
    def save(self, ** kwargs):
        return super().save(** kwargs)
    
    def add_log(self, device, ip, state, date):
        entry = LoginLog(device=device, ip=ip, state=state, date=date)
        self.login_logs.append(entry)
        return entry

    def get_login_logs(self):
        return self.login_logs
    
    def addGroup(self,group_id):
        if group_id not in self.groups:
            self.groups.append(group_id)

    def removeGroup(self, group_id):
        if group_id in self.groups:
            self.groups.remove(group_id)

Exemple #6

0

Afficher le fichier

class Faculty(DocType, Model):
    """Definition of the basic Faculty doctype.

    Contains any information related to a Faculty member instance pulled from the Forum data dump, or page scrapes.
    Data is saved in the elaticsearch index faculty.
    """
    faculty_id = Integer(required=True)
    name = Text(required=True)
    email = Text(required=True)
    department = Text()

    google_scholar = Text()
    orc_id = Text()
    sciverse_id = Text()
    research_id = Text()

    user_keywords = Text()

    class Meta:
        index = "faculty"

    def __str__(self):
        return "<Faculty ID:{} Name: {} Email: {}".format(
            self.faculty_id, self.name, self.email)

Exemple #7

0

Afficher le fichier

class Article(Document):
    title = Text(analyzer='snowball', fields={'raw': Keyword()})
    body = Text(analyzer=html_strip)
    tags = Keyword()
    language = Text()
    country = Text()
    publication_date = Date()
    source = Nested(Source)
    category = Keyword()
    url = Text()
    image_url = Text()

    class Index:
        name = "article-index"
        settings = {
          "number_of_shards": 2,
        }

Exemple #8

0

Afficher le fichier

Fichier : parent_child.py Projet : zikphil/elasticsearch-dsl-py

class Post(Document):
    """
    Base class for Question and Answer containing the common fields.
    """

    author = Object(User, required=True)
    created = Date(required=True)
    body = Text(required=True)
    comments = Nested(Comment)
    question_answer = Join(relations={"question": "answer"})

    @classmethod
    def _matches(cls, hit):
        # Post is an abstract class, make sure it never gets used for
        # deserialization
        return False

    class Index:
        name = "test-qa-site"
        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 0,
        }

    def add_comment(self, user, content, created=None, commit=True):
        c = Comment(author=user,
                    content=content,
                    created=created or datetime.now())
        self.comments.append(c)
        if commit:
            self.save()
        return c

    def save(self, **kwargs):
        # if there is no date, use now
        if self.created is None:
            self.created = datetime.now()
        return super(Post, self).save(**kwargs)

Exemple #9

0

Afficher le fichier

Fichier : doc_types.py Projet : invinst/CPDBv2_backend

class OfficerInfoDocType(DocType):
    id = Integer()
    percentiles = Nested(doc_class=OfficerYearlyPercentile,
                         properties=OfficerYearlyPercentile.mapping())
    full_name = Text(analyzer=autocomplete,
                     search_analyzer=autocomplete_search)
    badge = Text(analyzer=autocomplete, search_analyzer=autocomplete_search)
    badge_keyword = Keyword()
    historic_badges_keyword = Keyword()
    tags = Text(analyzer=autocomplete, search_analyzer=autocomplete_search)
    historic_badges = Text(analyzer=autocomplete,
                           search_analyzer=autocomplete_search)
    allegation_count = Long()
    has_visual_token = Boolean()
    complaint_percentile = Float()
    cr_incident_dates = Date()
    trr_datetimes = Date()

    historic_units = Nested(
        properties={
            'id':
            Integer(),
            'long_unit_name':
            Text(analyzer=autocomplete, search_analyzer=autocomplete_search),
            'description':
            Text(analyzer=autocomplete, search_analyzer=autocomplete_search),
        })

    @staticmethod
    def get_top_officers(percentile=99.0, size=40):
        query = OfficerInfoDocType.search().query(
            'bool',
            filter=[{
                'term': {
                    'has_visual_token': True
                }
            }, {
                'range': {
                    'complaint_percentile': {
                        'gte': percentile
                    }
                }
            }])
        query = query.sort({'complaint_percentile': 'desc'})
        return query[0:size].execute()

Exemple #10

0

Afficher le fichier

Fichier : models.py Projet : jakub-kozlowski/data-hub-leeloo

class ESSimpleModel(BaseESModel):
    """Elasticsearch representation of SimpleModel model."""

    id = Keyword()
    name = Text(
        fields={
            'keyword': fields.NormalizedKeyword(),
            'trigram': fields.TrigramText(),
        },
    )

    SEARCH_FIELDS = (
        'name',
        'name.trigram',
    )

    class Meta:
        """Default document meta data."""

        doc_type = DOC_TYPE

    class Index:
        doc_type = DOC_TYPE

Exemple #11

0

Afficher le fichier

Fichier : models.py Projet : EwdAger/EscapeKitty

class CareersType(DocType):
    # 招聘会
    suggest = Completion(analyzer=ik_analyzer)
    url = Keyword()
    tianyan_company_url = Keyword()
    company_name = Text(analyzer="ik_max_word")
    professionals = Text(analyzer="ik_max_word")
    company_property = Text(analyzer="ik_max_word")
    industry_category = Text(analyzer="ik_max_word")
    city_name = Text(analyzer="ik_max_word")
    meet_name = Keyword()
    school_name = Text(analyzer="ik_max_word")
    meet_time = Date()
    address = Keyword()

    class Meta:
        index = "careers"
        doc_type = "careers_type"

Exemple #12

0

Afficher le fichier

class ElasticRun(Document):
    run_id = Keyword()
    name = Keyword()
    source_type = Keyword()
    source_name = Keyword()
    experiment_id = Keyword()
    user_id = Keyword()
    status = Keyword()
    start_time = Long()
    end_time = Long()
    source_version = Keyword()
    lifecycle_stage = Keyword()
    artifact_uri = Text()
    latest_metrics = Nested(ElasticLatestMetric)
    params = Nested(ElasticParam)
    tags = Nested(ElasticTag)

    class Index:
        name = 'mlflow-runs'
        settings = {"number_of_shards": 2, "number_of_replicas": 2}

    def to_mlflow_entity(self) -> Run:
        run_info = RunInfo(run_uuid=self.meta.id,
                           run_id=self.meta.id,
                           experiment_id=str(self.experiment_id),
                           user_id=self.user_id,
                           status=self.status,
                           start_time=self.start_time,
                           end_time=self.end_time,
                           lifecycle_stage=self.lifecycle_stage,
                           artifact_uri=self.artifact_uri)

        run_data = RunData(
            metrics=[m.to_mlflow_entity() for m in self.latest_metrics],
            params=[p.to_mlflow_entity() for p in self.params],
            tags=[t.to_mlflow_entity() for t in self.tags])
        return Run(run_info=run_info, run_data=run_data)

Exemple #13

0

Afficher le fichier

Fichier : models.py Projet : soon14/BigDataPlatform

class ZhihuAnswer(DocType):
    title_suggest = Completion(analyzer=ik_analyzer, search_analyzer=ik_analyzer)
    id = Text()
    zhihu_id = Text()
    url = Text()
    question_id = Text()
    author_id = Text()
    content = Text(analyzer='ik_max_word')

    praise_num = Integer()
    comments_num = Integer()
    create_time = Date()
    update_time = Date()
    crawl_time = Date()

    class Meta:
        index = 'jobbole'
        doc_type = 'zhihu_answer'

Exemple #14

0

Afficher le fichier

Fichier : models.py Projet : Coder-Chandler/elasticsearch_django

class LianjiaType(DocType):
    suggest = Completion(analyzer=ik_analyzer)
    url = Keyword()
    lianjia_id = Keyword()
    residential_district_name = Text(analyzer="ik_max_word")
    residential_district_url = Keyword()
    title = Text(analyzer="ik_max_word")
    region = Text(analyzer="ik_max_word")
    region_detail = Text(analyzer="ik_max_word")
    address = Text(analyzer="ik_max_word")
    house_area = Integer()
    room_count = Integer()
    face_direction = Text(analyzer="ik_max_word")
    rent_price = Integer()
    floor = Text(analyzer="ik_max_word")
    publish_time = Date()
    total_watch_count = Integer()
    crwal_time = Date()

    class Meta:
        index = "lianjia"
        doc_type = "lianjia_house"

Exemple #15

0

Afficher le fichier

Fichier : log.py Projet : qimengmeng/centralspider

class CrawlerLogType(DocType):
    # 爬虫日志格式类型

    level = Keyword()
    message = Text(analyzer="ik_max_word")
    time = Date()
    subscribers = Keyword()
    detail = Nested(
        properties={
            "website": Keyword(),
            "type": Keyword(),
        }
    )
    logger_name = Keyword()
    path = Keyword()
    host = Keyword()

    class Meta:
        index = "crawler_log"
        doc_type = "crawler_log"

        settings = {
            "number_of_shards": 5,
            }

Exemple #16

0

Afficher le fichier

Fichier : tw_es.py Projet : techknowledgist/act

class DocTerms(DocType):
    title = Keyword()
    text = Text(analyzer='simple')
    term = Keyword()

    # --- Add more fields here ---

    class Meta:
        ### !!! APPLICATION DEPENDENT LINE
        ### !!! This line defines which index you will be using to create your index
        ### It is also used by tw_query.py (called from the flask app tw_app.py) to
        ### determine which index to query against.  So set this line to the correct
        ### index before starting tw_app.py
        index = 'test1_index'

        #/// index doc_type is defaulting to doc
        #doc_type = 'doc_terms'
        body = {
            'settings': {
                # just one shard, no replicas for testing
                'number_of_shards': 1,
                'number_of_replicas': 0
            }
        }

Exemple #17

0

Afficher le fichier

Fichier : models.py Projet : gaotingwang/SearchClient

class ZhiHuAnswerType(Document):
    """ 知乎回答 """

    suggest = Completion(analyzer=my_analyzer)
    # 知乎的问题 item
    zhihu_id = Keyword()
    url = Keyword()
    question_id = Keyword()
    author_id = Keyword()
    content = Text(analyzer="ik_max_word")
    praise_num = Integer()
    comments_num = Integer()
    create_time = Date()
    update_time = Date()
    crawl_time = Date()
    author_name = Keyword()

    # 定义了es中对应的index
    class Index:
        name = 'zhihu'
        doc_type = "answer"

    class Meta:
        doc_type = "answer"

Exemple #18

0

Afficher le fichier

    def apply_migrations(self):
        # add index_card mapping if not exists
        index = 'declarations_v2'
        doc_type = 'declaration'

        es = connections.connections.get_connection()
        mapping = es.indices.get_mapping(index=index, doc_type=doc_type)
        properties = mapping[index]['mappings'][doc_type]['properties']

        if 'index_card' not in properties:
            sys.stdout.write('Update mapping: add index_card\n')
            index_card_properties = {
                'properties': {
                    'index_card': Text(index=True,
                                       analyzer='ukrainian').to_dict()
                }
            }
            es.indices.put_mapping(index=index,
                                   doc_type=doc_type,
                                   body=index_card_properties)

        if 'full_name_for_sorting' not in properties['general']['properties']:
            sys.stdout.write('Update mapping: add full_name_for_sorting\n')
            full_name_properties = {
                'properties': {
                    'general': {
                        'properties': {
                            'full_name_for_sorting':
                            Keyword(index=True, ignore_above=100).to_dict()
                        }
                    }
                }
            }
            es.indices.put_mapping(index=index,
                                   doc_type=doc_type,
                                   body=full_name_properties)

Exemple #19

0

Afficher le fichier

Fichier : reddit.py Projet : lschmelzeisen/nasty-data

class RedditAwarding(InnerDoc):
    award_type = Keyword()
    coin_price = Integer()
    coin_reward = Integer()
    count = Integer()
    days_of_drip_extension = Integer()
    days_of_premium = Integer()
    description = Text(
        index_options=_INDEX_OPTIONS,
        index_phrases=_INDEX_PHRASES,
        term_vector=_INDEX_TERM_VECTOR,
        analyzer="standard",
    )
    end_date = RedditDate()
    icon_height = Short(doc_values=False, index=False)
    icon_url = Keyword(doc_values=False, index=False)
    icon_width = Short(doc_values=False, index=False)
    id = Keyword()
    is_enabled = Boolean()
    name = Keyword()
    resized_icons = Nested(RedditAwardingResizedIcon)
    start_date = RedditDate()
    subreddit_coin_reward = Integer()
    subreddit_id = Keyword()

Exemple #20

0

Afficher le fichier

Fichier : index.py Projet : sm-richards/cord-19-ems

class Article(Document):
    id_num = Text(analyzer='standard')
    authors = Nested(Name)  # authors field is a Nested list of Name objects
    title = Text(analyzer=text_analyzer, boost=3)
    abstract = Text(analyzer=text_analyzer)
    body = Nested(Section)
    body_text = Text(analyzer=text_analyzer)
    citations = Nested(
        Citation)  # citations field is a Nested list of Citation objects
    pr = Float(doc_values=True)
    cited_by = Nested(AnchorText)
    anchor_text = Text(analyzer='standard')
    ents = Text(analyzer=entity_analyzer)
    publish_time = Integer()
    in_english = Boolean()

    # override the Document save method to include subclass field definitions
    def save(self, *args, **kwargs):
        return super(Article, self).save(*args, **kwargs)

Exemple #21

0

Afficher le fichier

class Song(Media):
    audio_stream = Object(AudioStream)
    id_info = Object(ID)

    album = Text()
    albumartist = Text()
    arranger = Keyword()
    artist = Text()
    bpm = Float()
    compilation = Keyword()
    composer = Text()
    conductor = Text()
    discnumber = Keyword()
    mood = Keyword()
    performer = Text()
    tracknumber = Keyword()

    class Index(_Index):
        name = 'music'

Exemple #22

0

Afficher le fichier

class News(DocType):
    """
    Class for define mapping in ES
    """
    id = Integer()
    title = Text(analyzer='snowball', fields={'raw': Keyword()})
    publication = Text(analyzer='snowball', fields={'raw': Keyword()})
    author = Text(analyzer='snowball', fields={'raw': Keyword()})
    date = Date()
    year = Integer()
    month = Text(analyzer='snowball', fields={'raw': Keyword()})
    url = Text(analyzer='snowball', fields={'raw': Keyword()})
    content = Text(analyzer='snowball')

    class Meta:
        index = 'news14gb'

    def save(self, **kwargs):
        return super(News, self).save(**kwargs)

Exemple #23

0

Afficher le fichier

Fichier : search_wrapper.py Projet : klreeher/ourchive

class ChapterSearch(InnerDoc):
    title = Text()
    text = Text()
    image_alt_text = Text()
    work_notes = Text()
    summary = Text()
    number = Text()

    def create_from_json(self, chapter_json):
        self.number = chapter_json['number']
        self.title = chapter_json['title']
        self.text = chapter_json['text']
        self.image_alt_text = chapter_json['image_alt_text']
        self.summary = chapter_json['summary']

    def save_from_json(self, chapter_json):
        ChapterSearch.init()
        chapter = self.create_from_json(chapter_json)
        chapter.save()

Exemple #24

0

Afficher le fichier

class ZhihuQuestion(InnerDoc):
    title_suggest = Completion(analyzer=ik_analyzer,
                               search_analyzer=ik_analyzer)
    title = Text(analyzer='ik_max_word',
                 search_analyzer="ik_max_word",
                 fields={'title': Keyword()})
    content = Text(analyzer='ik_max_word')
    url = Text()
    question_id = Text()
    answer_num = Integer()
    comments_num = Integer()
    watch_user_num = Integer()
    click_num = Integer()
    topics = Text()
    id = Text()

    class Meta:
        index = 'jobbole'
        doc_type = 'zhihu_question'

Exemple #25

0

Afficher le fichier

Fichier : models.py Projet : TVect/ui-basenlp

class EntityDisease(DocType):
    name = Keyword()
    describe = Text(analyzer='ik_max_word')
    alias = Keyword()
    is_infect = Text(analyzer='ik_max_word')
    highrisk_group = Text(analyzer='ik_max_word')
    source_url = Text()
    treatment_cycle = Text(analyzer='ik_max_word')
    treatment_cost = Text(analyzer='ik_max_word')

    class Meta:
        index = 'med_base'

    def save(self, **kwargs):
        if "_id" not in self.meta:
            hl = hashlib.md5()
            hl.update(self.source_url.encode(encoding="utf-8"))
            self.meta['id'] = hl.hexdigest()
        return super(EntityDisease, self).save(**kwargs)

Exemple #26

0

Afficher le fichier

Fichier : document.py Projet : Edraak/django-es-utils

class ESDocument(Document):

    type = Text()

    # filtration and sorting fields will be in these 'metadata'
    metadata = Object(properties={
        'filters': Object(properties={
            'type': Text(),
            'status': Text(),
            'visibility': Text(),
            'hidden': Boolean()
        }),
        'sorting': Object(properties={
            'name': Text(fields={'keyword': Keyword()}),
            'type': Text(fields={'keyword': Keyword()}),
            'start_date': Date(fields={'keyword': Keyword()}),
            'clicks': Integer(fields={'keyword': Keyword()})
        })
    })

Exemple #27

0

Afficher le fichier

Fichier : es_types_lagou.py Projet : depchen/spider

class ArticleType_lagou(DocType):
    # 拉钩ITEM
    suggest=Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")
    url = Keyword()
    url_object_id = Keyword()
    salary = Keyword()
    job_city = Text(analyzer="ik_max_word")
    work_years = Keyword()
    degree_need = Text(analyzer="ik_max_word")
    job_type = Text(analyzer="ik_max_word")
    publish_time = Keyword()
    tags = Text(analyzer="ik_max_word")
    job_advantage = Text(analyzer="ik_max_word")
    job_desc = Text(analyzer="ik_max_word")
    job_addr = Keyword()
    company_url = Keyword()
    company_name = Keyword()
    crawl_time = Date()

    class Meta:
        index="lagou"
        doc_type="job"

Exemple #28

0

Afficher le fichier

class LkPersonType(Document):
    suggest = Completion(analyzer="ik_smart")
    # id = Keyword()
    # parent_id = Keyword()
    url = Keyword()
    name = Text(analyzer="ik_smart")
    occupation = Text(analyzer="ik_smart")
    location = Text(analyzer="ik_smart")
    photo_url = Keyword()
    photo_path = Keyword()
    gender = Keyword()
    beauty_score = Integer()
    summary = Text(analyzer="ik_smart")
    company_exp = Text(analyzer="ik_smart")
    company_jobexp = Text(analyzer="ik_smart")
    school_exp = Text(analyzer="ik_smart")

    class Index:
        name = "lnkn"
        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 0,
        }

Exemple #29

0

Afficher le fichier

class Doc(Document):
    """
    定義Elasticsearch Documentation的mapping
    參考：https://elasticsearch-dsl.readthedocs.io/en/latest/persistence.html
    """

    post_type = Integer(required=True)
    board = Text(required=True)
    author = Text(required=True)
    published = Date(required=True)
    title = Text()
    content = Text(required=True)
    ip = Text()
    upvote = Integer()
    novote = Integer()
    downvote = Integer()
    type = Text()
    post_id = Text()

    class Index:
        """Index info."""

        name = 'ptt'

Exemple #30

0

Afficher le fichier

Fichier : dictionary.py Projet : sk237/words

 def __init__(
         self,
         word: Text(),
         examples: list[Text()],
         *,
         definitions: Text(),
         syllables: Text(),
         pronunciation: Text(),
         rhyme_patterns: Text(),
         frequency: Text(),
         letters: Integer(),
         sounds: Integer(),
 ):
     super().__init__()
     self.examples = examples
     self.word = word
     self.definitions = definitions
     self.syllables = syllables
     self.pronunciation = pronunciation
     self.rhyme_patterns = rhyme_patterns
     self.frequency = frequency
     self.letters = letters
     self.sounds = sounds