class Article(DocType): title_suggest = Completion(analyzer=ik_analyzer, search_analyzer=ik_analyzer) title = Text(analyzer='ik_max_word', search_analyzer="ik_max_word", fields={'title': Keyword()}) id = Text() url = Text() front_image_url = Text() front_image_path = Text() create_date = Date() praise_nums = Integer() comment_nums = Integer() fav_nums = Integer() tags = Text(analyzer='ik_max_word', fields={'tags': Keyword()}) content = Text(analyzer='ik_max_word') class Meta: index = 'lcv-search' doc_type = 'article'
class Table(SearchableResource): display_name = Text(required=True, fields={"keyword": Keyword()}, analyzer=Analyzer.general_analyzer) database = Text(required=True, fields={"keyword": Keyword()}, analyzer=Analyzer.general_analyzer) cluster = Text(required=True, fields={"keyword": Keyword()}, analyzer=Analyzer.general_analyzer) schema = Text(required=True, fields={"keyword": Keyword()}, analyzer=Analyzer.stemming_analyzer) columns = Text(multi=True, fields={"keyword": Keyword()}, analyzer=Analyzer.stemming_analyzer) column_descriptions = Text( multi=True, fields={"alphanumeric": Text(analyzer=Analyzer.alphanum_analyzer)}, analyzer=Analyzer.english_analyzer)
class SkillIndex(DocType): """Skill's index class.""" id = Integer() name = Text(analyzer='standard') class Meta: """Index's metaclass.""" index = 'skills' @classmethod def store_index(cls, skill): """Create or update skill's index.""" obj = cls( meta={'id': skill.id}, id=skill.id, name=skill.name ) obj.save() return obj.to_dict(include_meta=True)
class DicomDoc(Document): data_exame = Text(analyzer='snowball') nome_paciente = Text(analyzer='snowball') descricao_estudo = Text(analyzer='snowball') descricao_serie = Text(analyzer='snowball') data_nasc_paciente = Text(analyzer='snowball') especialidade_exame = Text(analyzer='snowball') class Index: name = 'dicom' settings = { "number_of_shards": 2, } def save(self, **kwargs): self.lines = len(self.body.split()) return super(DicomDoc, self).save(**kwargs)
class User(Document): username = Text() email = Text() password = Text() name = Text() surname = Text() birthday = Date() gender = Text() login_logs = Nested(LoginLog) groups = Keyword(multi = True) #location = Text() #description = Text() #url = Text() #registerDate = Date() #profileImagePath = Text() #phone = Text() #website = Text() #postCount = Integer() #posts = Nested(Post) #friendsCount = Integer() #friends = Nested(User) class Index: name = 'user' def save(self, ** kwargs): return super().save(** kwargs) def add_log(self, device, ip, state, date): entry = LoginLog(device=device, ip=ip, state=state, date=date) self.login_logs.append(entry) return entry def get_login_logs(self): return self.login_logs def addGroup(self,group_id): if group_id not in self.groups: self.groups.append(group_id) def removeGroup(self, group_id): if group_id in self.groups: self.groups.remove(group_id)
class Faculty(DocType, Model): """Definition of the basic Faculty doctype. Contains any information related to a Faculty member instance pulled from the Forum data dump, or page scrapes. Data is saved in the elaticsearch index faculty. """ faculty_id = Integer(required=True) name = Text(required=True) email = Text(required=True) department = Text() google_scholar = Text() orc_id = Text() sciverse_id = Text() research_id = Text() user_keywords = Text() class Meta: index = "faculty" def __str__(self): return "<Faculty ID:{} Name: {} Email: {}".format( self.faculty_id, self.name, self.email)
class Article(Document): title = Text(analyzer='snowball', fields={'raw': Keyword()}) body = Text(analyzer=html_strip) tags = Keyword() language = Text() country = Text() publication_date = Date() source = Nested(Source) category = Keyword() url = Text() image_url = Text() class Index: name = "article-index" settings = { "number_of_shards": 2, }
class Post(Document): """ Base class for Question and Answer containing the common fields. """ author = Object(User, required=True) created = Date(required=True) body = Text(required=True) comments = Nested(Comment) question_answer = Join(relations={"question": "answer"}) @classmethod def _matches(cls, hit): # Post is an abstract class, make sure it never gets used for # deserialization return False class Index: name = "test-qa-site" settings = { "number_of_shards": 1, "number_of_replicas": 0, } def add_comment(self, user, content, created=None, commit=True): c = Comment(author=user, content=content, created=created or datetime.now()) self.comments.append(c) if commit: self.save() return c def save(self, **kwargs): # if there is no date, use now if self.created is None: self.created = datetime.now() return super(Post, self).save(**kwargs)
class OfficerInfoDocType(DocType): id = Integer() percentiles = Nested(doc_class=OfficerYearlyPercentile, properties=OfficerYearlyPercentile.mapping()) full_name = Text(analyzer=autocomplete, search_analyzer=autocomplete_search) badge = Text(analyzer=autocomplete, search_analyzer=autocomplete_search) badge_keyword = Keyword() historic_badges_keyword = Keyword() tags = Text(analyzer=autocomplete, search_analyzer=autocomplete_search) historic_badges = Text(analyzer=autocomplete, search_analyzer=autocomplete_search) allegation_count = Long() has_visual_token = Boolean() complaint_percentile = Float() cr_incident_dates = Date() trr_datetimes = Date() historic_units = Nested( properties={ 'id': Integer(), 'long_unit_name': Text(analyzer=autocomplete, search_analyzer=autocomplete_search), 'description': Text(analyzer=autocomplete, search_analyzer=autocomplete_search), }) @staticmethod def get_top_officers(percentile=99.0, size=40): query = OfficerInfoDocType.search().query( 'bool', filter=[{ 'term': { 'has_visual_token': True } }, { 'range': { 'complaint_percentile': { 'gte': percentile } } }]) query = query.sort({'complaint_percentile': 'desc'}) return query[0:size].execute()
class ESSimpleModel(BaseESModel): """Elasticsearch representation of SimpleModel model.""" id = Keyword() name = Text( fields={ 'keyword': fields.NormalizedKeyword(), 'trigram': fields.TrigramText(), }, ) SEARCH_FIELDS = ( 'name', 'name.trigram', ) class Meta: """Default document meta data.""" doc_type = DOC_TYPE class Index: doc_type = DOC_TYPE
class CareersType(DocType): # 招聘会 suggest = Completion(analyzer=ik_analyzer) url = Keyword() tianyan_company_url = Keyword() company_name = Text(analyzer="ik_max_word") professionals = Text(analyzer="ik_max_word") company_property = Text(analyzer="ik_max_word") industry_category = Text(analyzer="ik_max_word") city_name = Text(analyzer="ik_max_word") meet_name = Keyword() school_name = Text(analyzer="ik_max_word") meet_time = Date() address = Keyword() class Meta: index = "careers" doc_type = "careers_type"
class ElasticRun(Document): run_id = Keyword() name = Keyword() source_type = Keyword() source_name = Keyword() experiment_id = Keyword() user_id = Keyword() status = Keyword() start_time = Long() end_time = Long() source_version = Keyword() lifecycle_stage = Keyword() artifact_uri = Text() latest_metrics = Nested(ElasticLatestMetric) params = Nested(ElasticParam) tags = Nested(ElasticTag) class Index: name = 'mlflow-runs' settings = {"number_of_shards": 2, "number_of_replicas": 2} def to_mlflow_entity(self) -> Run: run_info = RunInfo(run_uuid=self.meta.id, run_id=self.meta.id, experiment_id=str(self.experiment_id), user_id=self.user_id, status=self.status, start_time=self.start_time, end_time=self.end_time, lifecycle_stage=self.lifecycle_stage, artifact_uri=self.artifact_uri) run_data = RunData( metrics=[m.to_mlflow_entity() for m in self.latest_metrics], params=[p.to_mlflow_entity() for p in self.params], tags=[t.to_mlflow_entity() for t in self.tags]) return Run(run_info=run_info, run_data=run_data)
class ZhihuAnswer(DocType): title_suggest = Completion(analyzer=ik_analyzer, search_analyzer=ik_analyzer) id = Text() zhihu_id = Text() url = Text() question_id = Text() author_id = Text() content = Text(analyzer='ik_max_word') praise_num = Integer() comments_num = Integer() create_time = Date() update_time = Date() crawl_time = Date() class Meta: index = 'jobbole' doc_type = 'zhihu_answer'
class LianjiaType(DocType): suggest = Completion(analyzer=ik_analyzer) url = Keyword() lianjia_id = Keyword() residential_district_name = Text(analyzer="ik_max_word") residential_district_url = Keyword() title = Text(analyzer="ik_max_word") region = Text(analyzer="ik_max_word") region_detail = Text(analyzer="ik_max_word") address = Text(analyzer="ik_max_word") house_area = Integer() room_count = Integer() face_direction = Text(analyzer="ik_max_word") rent_price = Integer() floor = Text(analyzer="ik_max_word") publish_time = Date() total_watch_count = Integer() crwal_time = Date() class Meta: index = "lianjia" doc_type = "lianjia_house"
class CrawlerLogType(DocType): # 爬虫日志格式类型 level = Keyword() message = Text(analyzer="ik_max_word") time = Date() subscribers = Keyword() detail = Nested( properties={ "website": Keyword(), "type": Keyword(), } ) logger_name = Keyword() path = Keyword() host = Keyword() class Meta: index = "crawler_log" doc_type = "crawler_log" settings = { "number_of_shards": 5, }
class DocTerms(DocType): title = Keyword() text = Text(analyzer='simple') term = Keyword() # --- Add more fields here --- class Meta: ### !!! APPLICATION DEPENDENT LINE ### !!! This line defines which index you will be using to create your index ### It is also used by tw_query.py (called from the flask app tw_app.py) to ### determine which index to query against. So set this line to the correct ### index before starting tw_app.py index = 'test1_index' #/// index doc_type is defaulting to doc #doc_type = 'doc_terms' body = { 'settings': { # just one shard, no replicas for testing 'number_of_shards': 1, 'number_of_replicas': 0 } }
class ZhiHuAnswerType(Document): """ 知乎回答 """ suggest = Completion(analyzer=my_analyzer) # 知乎的问题 item zhihu_id = Keyword() url = Keyword() question_id = Keyword() author_id = Keyword() content = Text(analyzer="ik_max_word") praise_num = Integer() comments_num = Integer() create_time = Date() update_time = Date() crawl_time = Date() author_name = Keyword() # 定义了es中对应的index class Index: name = 'zhihu' doc_type = "answer" class Meta: doc_type = "answer"
def apply_migrations(self): # add index_card mapping if not exists index = 'declarations_v2' doc_type = 'declaration' es = connections.connections.get_connection() mapping = es.indices.get_mapping(index=index, doc_type=doc_type) properties = mapping[index]['mappings'][doc_type]['properties'] if 'index_card' not in properties: sys.stdout.write('Update mapping: add index_card\n') index_card_properties = { 'properties': { 'index_card': Text(index=True, analyzer='ukrainian').to_dict() } } es.indices.put_mapping(index=index, doc_type=doc_type, body=index_card_properties) if 'full_name_for_sorting' not in properties['general']['properties']: sys.stdout.write('Update mapping: add full_name_for_sorting\n') full_name_properties = { 'properties': { 'general': { 'properties': { 'full_name_for_sorting': Keyword(index=True, ignore_above=100).to_dict() } } } } es.indices.put_mapping(index=index, doc_type=doc_type, body=full_name_properties)
class RedditAwarding(InnerDoc): award_type = Keyword() coin_price = Integer() coin_reward = Integer() count = Integer() days_of_drip_extension = Integer() days_of_premium = Integer() description = Text( index_options=_INDEX_OPTIONS, index_phrases=_INDEX_PHRASES, term_vector=_INDEX_TERM_VECTOR, analyzer="standard", ) end_date = RedditDate() icon_height = Short(doc_values=False, index=False) icon_url = Keyword(doc_values=False, index=False) icon_width = Short(doc_values=False, index=False) id = Keyword() is_enabled = Boolean() name = Keyword() resized_icons = Nested(RedditAwardingResizedIcon) start_date = RedditDate() subreddit_coin_reward = Integer() subreddit_id = Keyword()
class Article(Document): id_num = Text(analyzer='standard') authors = Nested(Name) # authors field is a Nested list of Name objects title = Text(analyzer=text_analyzer, boost=3) abstract = Text(analyzer=text_analyzer) body = Nested(Section) body_text = Text(analyzer=text_analyzer) citations = Nested( Citation) # citations field is a Nested list of Citation objects pr = Float(doc_values=True) cited_by = Nested(AnchorText) anchor_text = Text(analyzer='standard') ents = Text(analyzer=entity_analyzer) publish_time = Integer() in_english = Boolean() # override the Document save method to include subclass field definitions def save(self, *args, **kwargs): return super(Article, self).save(*args, **kwargs)
class Song(Media): audio_stream = Object(AudioStream) id_info = Object(ID) album = Text() albumartist = Text() arranger = Keyword() artist = Text() bpm = Float() compilation = Keyword() composer = Text() conductor = Text() discnumber = Keyword() mood = Keyword() performer = Text() tracknumber = Keyword() class Index(_Index): name = 'music'
class News(DocType): """ Class for define mapping in ES """ id = Integer() title = Text(analyzer='snowball', fields={'raw': Keyword()}) publication = Text(analyzer='snowball', fields={'raw': Keyword()}) author = Text(analyzer='snowball', fields={'raw': Keyword()}) date = Date() year = Integer() month = Text(analyzer='snowball', fields={'raw': Keyword()}) url = Text(analyzer='snowball', fields={'raw': Keyword()}) content = Text(analyzer='snowball') class Meta: index = 'news14gb' def save(self, **kwargs): return super(News, self).save(**kwargs)
class ChapterSearch(InnerDoc): title = Text() text = Text() image_alt_text = Text() work_notes = Text() summary = Text() number = Text() def create_from_json(self, chapter_json): self.number = chapter_json['number'] self.title = chapter_json['title'] self.text = chapter_json['text'] self.image_alt_text = chapter_json['image_alt_text'] self.summary = chapter_json['summary'] def save_from_json(self, chapter_json): ChapterSearch.init() chapter = self.create_from_json(chapter_json) chapter.save()
class ZhihuQuestion(InnerDoc): title_suggest = Completion(analyzer=ik_analyzer, search_analyzer=ik_analyzer) title = Text(analyzer='ik_max_word', search_analyzer="ik_max_word", fields={'title': Keyword()}) content = Text(analyzer='ik_max_word') url = Text() question_id = Text() answer_num = Integer() comments_num = Integer() watch_user_num = Integer() click_num = Integer() topics = Text() id = Text() class Meta: index = 'jobbole' doc_type = 'zhihu_question'
class EntityDisease(DocType): name = Keyword() describe = Text(analyzer='ik_max_word') alias = Keyword() is_infect = Text(analyzer='ik_max_word') highrisk_group = Text(analyzer='ik_max_word') source_url = Text() treatment_cycle = Text(analyzer='ik_max_word') treatment_cost = Text(analyzer='ik_max_word') class Meta: index = 'med_base' def save(self, **kwargs): if "_id" not in self.meta: hl = hashlib.md5() hl.update(self.source_url.encode(encoding="utf-8")) self.meta['id'] = hl.hexdigest() return super(EntityDisease, self).save(**kwargs)
class ESDocument(Document): type = Text() # filtration and sorting fields will be in these 'metadata' metadata = Object(properties={ 'filters': Object(properties={ 'type': Text(), 'status': Text(), 'visibility': Text(), 'hidden': Boolean() }), 'sorting': Object(properties={ 'name': Text(fields={'keyword': Keyword()}), 'type': Text(fields={'keyword': Keyword()}), 'start_date': Date(fields={'keyword': Keyword()}), 'clicks': Integer(fields={'keyword': Keyword()}) }) })
class ArticleType_lagou(DocType): # 拉钩ITEM suggest=Completion(analyzer=ik_analyzer) title = Text(analyzer="ik_max_word") url = Keyword() url_object_id = Keyword() salary = Keyword() job_city = Text(analyzer="ik_max_word") work_years = Keyword() degree_need = Text(analyzer="ik_max_word") job_type = Text(analyzer="ik_max_word") publish_time = Keyword() tags = Text(analyzer="ik_max_word") job_advantage = Text(analyzer="ik_max_word") job_desc = Text(analyzer="ik_max_word") job_addr = Keyword() company_url = Keyword() company_name = Keyword() crawl_time = Date() class Meta: index="lagou" doc_type="job"
class LkPersonType(Document): suggest = Completion(analyzer="ik_smart") # id = Keyword() # parent_id = Keyword() url = Keyword() name = Text(analyzer="ik_smart") occupation = Text(analyzer="ik_smart") location = Text(analyzer="ik_smart") photo_url = Keyword() photo_path = Keyword() gender = Keyword() beauty_score = Integer() summary = Text(analyzer="ik_smart") company_exp = Text(analyzer="ik_smart") company_jobexp = Text(analyzer="ik_smart") school_exp = Text(analyzer="ik_smart") class Index: name = "lnkn" settings = { "number_of_shards": 1, "number_of_replicas": 0, }
class Doc(Document): """ 定義Elasticsearch Documentation的mapping 參考:https://elasticsearch-dsl.readthedocs.io/en/latest/persistence.html """ post_type = Integer(required=True) board = Text(required=True) author = Text(required=True) published = Date(required=True) title = Text() content = Text(required=True) ip = Text() upvote = Integer() novote = Integer() downvote = Integer() type = Text() post_id = Text() class Index: """Index info.""" name = 'ptt'
def __init__( self, word: Text(), examples: list[Text()], *, definitions: Text(), syllables: Text(), pronunciation: Text(), rhyme_patterns: Text(), frequency: Text(), letters: Integer(), sounds: Integer(), ): super().__init__() self.examples = examples self.word = word self.definitions = definitions self.syllables = syllables self.pronunciation = pronunciation self.rhyme_patterns = rhyme_patterns self.frequency = frequency self.letters = letters self.sounds = sounds