class ArticleType(Document): # 文章类型 id = Integer() position = Keyword() company = Keyword() city = Keyword() region = Keyword() date = Date() time = Date() maxPrice = Double() minPrice = Double() avgPrice = Double() profession = Keyword() companyType = Keyword() location = Keyword() cotype = Keyword() degree = Keyword() workyear = Keyword() companySize = Keyword() jobTerm = Keyword() positionUrl = Keyword() class Index: # 数据库名称和表名称 name = "qiancheng" type = "doc"
class EventDocument(Document): name = Text(analyzer='snowball', fields={'raw': Keyword()}) organizer_name = Text(analyzer='snowball', fields={'raw': Keyword()}) description = Text(analyzer='snowball') start = Date() end = Date() minimum_ticket_price = Double() maximum_ticket_price = Double() def __init__(self, model=None, **kwargs): if model: kwargs = {**self.Django.serializer_class(model).data, **kwargs} pk = kwargs.pop('id') super().__init__(meta={'id': pk}, **kwargs) else: super().__init__(**kwargs) class Index: name = 'event' settings = { "number_of_shards": 2, } class Django: queryset = lambda **kwargs: Event.objects.filter(**kwargs)[0] serializer_class = EventSerializer
class ShoutIndex(DocType): # indexed type = String(index='not_analyzed') title = String(analyzer='snowball', fields={'raw': String(index='not_analyzed')}) text = String(analyzer='snowball') tags = String(index='not_analyzed') filters = Object() category = String(index='not_analyzed') country = String(index='not_analyzed') postal_code = String(index='not_analyzed') state = String(index='not_analyzed') city = String(index='not_analyzed') latitude = Double() longitude = Double() price = Double() available_count = Integer() is_sold = Boolean() is_muted = Boolean() uid = String(index='not_analyzed') username = String(index='not_analyzed') published_at = Date() expires_at = Date() # todo: should not be analysed or indexed currency = String(index='not_analyzed') address = String(index='not_analyzed') thumbnail = String(index='not_analyzed') video_url = String(index='not_analyzed') is_sss = Boolean() priority = Integer() class Meta: index = '%s_shout' % settings.ES_BASE_INDEX dynamic_templates = MetaField([ # Todo (mo): disabled for now since we are filtering only on string values # { # "filters_integer_keys": { # "match_pattern": "regex", # "match": "^(num_.*|.*size|.*length|.*width|.*area|.*vol|.*qty|.*speed|.*year|age|mileage|.*weight)$", # "mapping": { # "type": "integer" # } # } # }, { "filters_string_keys": { "path_match": "filters.*", "mapping": { "type": "string", "index": "not_analyzed" } } } ]) @property def published_at_unix(self): return date_unix(self.published_at)
class Payment(InnerDoc): year = Date() amount = Double() scheme = Text(analyzer=eufs_analyzer) country = Keyword() amount_original = Double() currency_original = Keyword()
class ArticleType(DocType): brandId = Text() categoryId = Long() commission = Double() consumerProtection = Text() cp = Object(Cp) cpId = Text() cpUrl = Text() cpid = Long() created = Long() dcid = Integer() descUrl = Text() dv = Double() flagShip = Long() freeExpress = Integer() freeExpressBack = Integer() gold = Long() historySales = Long() imagePath = Text() imagePaths = Text() intro = Text(store=True, analyzer="ik_smart") isBrand = Long() itemId = Text() itemUrl = Text() ju = Long() price = Long() props = Text() qiang = Long() qianggou = Object(Qianggou) rootCategoryId = Long() sellPrice = Long() sellerId = Text() sellerInfo = Text() shopId = Text() shopType = Text() source = Text() subtitle = Text() tags = Text(store=True, analyzer='ik_smart') title = Text(store=True, analyzer='ik_smart') updated = Long() viewCount = Long() weight = Integer() ymbCategoryId = Long() # 搜索建议 # 不能直接指定分词器, suggest = Completion(analyzer=ik_analyzer) class Meta: # 索引名称 index = 'bodao' # type名称 doc_type = 'es' all = MetaField(enabled=True)
class SellerData(Document): name = Text() date = Date() numberOfSales = Integer() numberOfReviws = Integer() numberOfListings = Integer() onEtsySince = Integer() freeShippingPercent = Double() avgPrice = Double() crawlID = Integer() class Index: name = 'sellers_data'
class HouseType(DocType): suggest = Completion(analyzer=ik_analyzer) url_id = Keyword() # 不用分词 url = Keyword() total_price = Double() unit_price = Double() community = Text(analyzer="ik_max_word") area = Keyword() house_type = Keyword() floor_area = Double() sale_time = Date() class Meta: index = "lianjia" doc_type = "house"
class Channel(Document): channel_title = Text(analyzer=name_analyzer) channel_desc = Text(analyzer=text_analyzer) all_playlists_titles = Text(analyzer=name_analyzer) all_playlists_desc = Text(analyzer=text_analyzer) all_videos_titles = Text(analyzer=name_analyzer) all_videos_desc = Text(analyzer=text_analyzer) upload_interval = Double() view_count = Long() video_count = Integer() subscriber_count = Long() channel_create_date = Date() latest_upload_datetime = Date() categories = Text(analyzer=name_analyzer) image_url = Keyword() channel_url = Keyword() # override the Document save method to include subclass field definitions def save(self, *args, **kwargs): return super(Channel, self).save(*args, **kwargs)
class GeneralDoc(Document): # define the fields here start = Double() duration = Double() content = Text() prev_id = Keyword() next_id = Keyword() context = Text() caption = Object(CaptionInnerDoc) class Index: name = "general_idx" # using the default settings for now. def save(self, **kwargs): # do something before save here, if you wish return super().save(**kwargs)
class GeneralDoc(Document): symbol = Keyword() # Text(fields={"keyword": Keyword()}) company_name = Text() high_52_week = Double() low_52_week = Double() close_price = Double() avg_volume_30_days = Long() timestamp = Date() def save(self, **kwargs): self.timestamp = datetime.now() return super().save(**kwargs) class Index: name = "general"
class BookDoc(Document): symbol = Keyword() bid_size = Long() ask_size = Long() bid_price = Double() ask_price = Double() open_price = Double() update_time = Date() timestamp = Date() def save(self, **kwargs): self.timestamp = datetime.now() return super().save(**kwargs) class Index: name = "book"
class FilmType(DocType): # 实现搜索建议 suggest = Completion(analyzer=ik_analyzer) # 定义主要字段 url = Keyword() title = Text(analyzer="ik_max_word") magnet = Keyword() publish_time = Date() content = Text(analyzer="ik_max_word") imdb_score = Double() douban_score = Double() ftp_address = Keyword() front_image_url = Keyword() front_image_path = Keyword() class Meta: index = 'entertainment' doc_type = 'film'
class SerializationDoc(DocType): i = Long() b = Boolean() d = Double() bin = Binary() ip = Ip() class Meta: index = 'test-serialization'
class SerializationDoc(Document): i = Long() b = Boolean() d = Double() bin = Binary() ip = Ip() class Index: name = 'test-serialization'
class StarDocument(Document): type = Keyword() label = Keyword() id = Integer() title = Text(analyzer=autocomplete, search_analyzer=autocomplete_search) date = Date() last_updated = Date() content = Text(analyzer=autocomplete, search_analyzer=autocomplete_search) description = Text(analyzer=autocomplete, search_analyzer=autocomplete_search) organization = Keyword() website = Keyword() location = Keyword() ages = Keyword(multi=True) status = Keyword() category = Keyword(multi=True) latitude = Double() longitude = Double() geo_point = GeoPoint() status = Keyword()
class ElasticLatestMetric(InnerDoc): key = Keyword() value = Double() timestamp = Long() step = Integer() is_nan = Boolean() def to_mlflow_entity(self) -> Metric: return Metric(key=self.key, value=self.value if not self.is_nan else float("nan"), timestamp=self.timestamp, step=self.step)
class StarDocument(Document): type = Keyword() label = Keyword() id = Integer() title = Text() date = Date() last_updated = Date() content = Text(analyzer=stem_analyzer) description = Text() organization_name = Keyword() website = Keyword() location = Keyword() ages = Keyword(multi=True) languages = Keyword(multi=True) status = Keyword() category = Keyword(multi=True) latitude = Double() longitude = Double() geo_point = GeoPoint() status = Keyword() no_address = Boolean() is_draft = Boolean()
class QimaiType(DocType): suggest = Completion(analyzer=ik_analyzer) appId = Keyword() appName = Text(analyzer="ik_max_word") icon = Keyword() publisher = Text(analyzer="ik_max_word") country = Text(analyzer="ik_max_word") genre = Text(analyzer="ik_max_word") price = Double() releaseTime = Date() class Meta: index = "appdata" doc_type = "appinfo"
class DoubanModel(Document): title = Text(analyzer=analyzer('ik_max_word'), fields={ 'suggest': Completion(analyzer=analyzer('ik_max_word')), }) douban_link = Text() rating = Double() class Index: name = 'xingren' settings = { "number_of_shards": 3, }
class TaSDoc(Document): symbol = Keyword() last_price = Double() update_time = Date() last_exch = Keyword() last_size = Long() timestamp = Date() def save(self, **kwargs): self.timestamp = datetime.now() return super().save(**kwargs) class Index: name = "tas"
class Recipient(Document): country = Keyword() slug = Keyword() name = Text(fields={'raw': Keyword()}, analyzer=eufs_analyzer) address = Text(analyzer=eufs_analyzer) postcode = Text(analyzer=eufs_analyzer) location = Text(analyzer=eufs_analyzer) recipient_country = Keyword() total_amount = Double() payments = Nested(Payment) class Index: name = 'farmsubsidy_recipients'
class Client(Document): """ Client model """ client_id = Keyword() secret_hash = Keyword() email = Keyword() confirmed = Boolean() favorite_genders = Keyword() balance = Double() class Index: name = 'clients' @property def secret(self): return self.secret_hash @secret.setter def secret(self, pwd): self.secret_hash = generate_password_hash(pwd) def check_secret(self, pwd): return check_password_hash(self.secret_hash, pwd) @property def offers_query(self): return BookOffer.search().query('match', client_id=self.client_id) @property def offers(self): return self.offers_query.execute() def to_dict(self, include_id=False, include_offers=False, include_meta=False, skip_empty=True): base = super().to_dict(include_meta=include_meta, skip_empty=skip_empty) if include_id: base['id'] = self.meta.id if include_offers: base['offers'] = [] base['purchased'] = [] for offer in self.offers: if not offer.purchased: base['offers'].append(offer.to_dict(include_id=True)) return base
class ElasticMetric(Document): key = Keyword() value = Double() timestamp = Long() step = Integer() is_nan = Boolean() run_id = Keyword() class Index: name = 'mlflow-metrics' settings = {"number_of_shards": 2, "number_of_replicas": 2} def to_mlflow_entity(self) -> Metric: return Metric(key=self.key, value=self.value if not self.is_nan else float("nan"), timestamp=self.timestamp, step=self.step)
class B2BProductIndex(DocType): django_id = Integer() name = String(analyzer='snowball', fields={'raw': String(index='no')}) description = String(analyzer=html_strip) country = Integer() branches = Integer(multi=True) b2b_categories = Integer(multi=True) organization = Integer() price = Double() is_active = Boolean() is_deleted = Boolean() created_at = Date() @staticmethod def get_model(): from b24online.models import B2BProduct return B2BProduct @classmethod def get_queryset(cls): return cls.get_model().objects.all().prefetch_related('company', 'company__countries', 'branches') @classmethod def to_index(cls, obj): index_instance = cls( django_id=obj.pk, name=obj.name, description=obj.description, organization=obj.company.pk, is_active=obj.is_active, is_deleted=obj.is_deleted, country=obj.company.country.pk, price=obj.cost, created_at=obj.created_at ) index_instance.b2b_categories = list(set([item for category in obj.categories.all() for item in category.get_ancestors(include_self=True).values_list('pk', flat=True)])) index_instance.branches = list(set([item for branch in obj.branches.all() for item in branch.get_ancestors(include_self=True).values_list('pk', flat=True)])) return index_instance
class DoubanType(Document): # title = Text( # analyzer=analyzer('ik_max_word') # # ) # title_suggest = Completion(analyzer=analyzer('ik_max_word')) title = Text(analyzer=analyzer('ik_max_word'), fields={ 'suggest': Completion(analyzer=analyzer('ik_max_word')), }) original_title = Text() douban_link = Text() rating = Double() genres = Keyword() country = Keyword() lang = Keyword() year = Integer() release_at = Date() runtime = Integer() season_count = Integer() imdb = Keyword() overview = Text(analyzer="ik_smart") small_image = Text() # big_image = Text() directors = Keyword() writers = Keyword() casts = Keyword() class Index: # doc_type = 'douban' name = 'xingren' settings = { "number_of_shards": 3, }
class Skill(DocType): """ Representation of a skill inside ES """ name = Text(fields={'raw': Keyword()}) creator = Keyword() category = Keyword() url = Text() description = Text() short_description = Text() avg_rating = Double() num_ratings = Integer() html = Text() usages = Text(fields={'raw': Keyword()}) image_url = Text() keyphrases = Text(fields={'raw': Keyword()}) class Meta: """ Metadata about where this data type resides """ index = INDEX @classmethod def set_index(cls, new_index: str): cls._doc_type.index = new_index @classmethod def get_index(cls): return cls._doc_type.index def to_json(self): """ Provide a JSON representation of this Skill """ doc = self.meta.to_dict() doc['_source'] = self.to_dict() return doc
class Measurement(DocType): device_id = Text() device_name = Text() timestamp = Date() s = Double() class Meta: index = 'measurements' @classmethod def array_factory(cls, input_measurements): measurements = [] for m in input_measurements: measurements.append(cls.from_dict(m)) return measurements @classmethod def from_dict(cls, m): ts = datetime.now() device_name = client_list[m['id']] return cls(device_id=m['id'], device_name=device_name, timestamp=ts, s=m['s'])
class Interaction(BaseESModel): """Elasticsearch representation of Interaction model.""" id = Keyword() company = fields.company_field() company_sector = fields.sector_field() company_one_list_group_tier = fields.id_unindexed_name_field() communication_channel = fields.id_unindexed_name_field() contacts = _contact_field() created_on = Date() date = Date() dit_participants = Object(_DITParticipant) event = fields.id_name_partial_field() investment_project = fields.id_unindexed_name_field() investment_project_sector = fields.sector_field() is_event = Boolean(index=False) grant_amount_offered = Double(index=False) kind = Keyword() modified_on = Date() net_company_receipt = Double(index=False) notes = fields.Text(index=False) policy_areas = fields.id_unindexed_name_field() policy_issue_types = fields.id_unindexed_name_field() service = fields.id_unindexed_name_field() service_delivery_status = fields.id_unindexed_name_field() subject = fields.NormalizedKeyword( fields={ 'english': fields.EnglishText(), }, ) was_policy_feedback_provided = Boolean() were_countries_discussed = Boolean() export_countries = _export_country_field() MAPPINGS = { 'company': dict_utils.company_dict, 'communication_channel': dict_utils.id_name_dict, 'contacts': dict_utils.contact_or_adviser_list_of_dicts, 'dit_participants': _dit_participant_list, 'export_countries': _export_countries_list, 'event': dict_utils.id_name_dict, 'investment_project': dict_utils.id_name_dict, 'policy_areas': dict_utils.id_name_list_of_dicts, 'policy_issue_types': dict_utils.id_name_list_of_dicts, 'service': dict_utils.id_name_dict, 'service_delivery_status': dict_utils.id_name_dict, } COMPUTED_MAPPINGS = { 'company_sector': dict_utils.computed_nested_sector_dict('company.sector'), 'company_one_list_group_tier': lambda obj: dict_utils.id_name_dict( obj.company.get_one_list_group_tier() if obj.company else None, ), 'investment_project_sector': dict_utils.computed_nested_sector_dict( 'investment_project.sector', ), 'is_event': attrgetter('is_event'), } SEARCH_FIELDS = ( 'id', 'company.name', 'company.name.trigram', 'contacts.name', # to find 2-letter words 'contacts.name.trigram', 'event.name', 'event.name.trigram', 'subject.english', 'dit_participants.adviser.name', 'dit_participants.adviser.name.trigram', 'dit_participants.team.name', 'dit_participants.team.name.trigram', ) class Meta: """Default document meta data.""" doc_type = DOC_TYPE class Index: doc_type = DOC_TYPE
class InvestmentProject(BaseESModel): """Elasticsearch representation of InvestmentProject.""" id = Keyword() actual_land_date = Date() actual_uk_regions = fields.id_name_field() address_1 = Text() address_2 = Text() address_town = fields.NormalizedKeyword() address_postcode = Text() approved_commitment_to_invest = Boolean() approved_fdi = Boolean() approved_good_value = Boolean() approved_high_value = Boolean() approved_landed = Boolean() approved_non_fdi = Boolean() allow_blank_estimated_land_date = Boolean(index=False) allow_blank_possible_uk_regions = Boolean(index=False) anonymous_description = fields.EnglishText() archived = Boolean() archived_by = fields.contact_or_adviser_field() archived_on = Date() archived_reason = Text() associated_non_fdi_r_and_d_project = _related_investment_project_field() average_salary = fields.id_name_field() business_activities = fields.id_name_field() client_cannot_provide_foreign_investment = Boolean() client_cannot_provide_total_investment = Boolean() client_contacts = fields.contact_or_adviser_field() client_relationship_manager = fields.contact_or_adviser_field( include_dit_team=True) client_requirements = Text(index=False) comments = fields.EnglishText() country_investment_originates_from = fields.id_name_field() country_lost_to = Object(properties={ 'id': Keyword(index=False), 'name': Text(index=False), }, ) created_on = Date() created_by = fields.contact_or_adviser_field(include_dit_team=True) date_abandoned = Date() date_lost = Date() delivery_partners = fields.id_name_field() description = fields.EnglishText() estimated_land_date = Date() export_revenue = Boolean() fdi_type = fields.id_name_field() fdi_value = fields.id_name_field() foreign_equity_investment = Double() government_assistance = Boolean() intermediate_company = fields.id_name_field() investor_company = fields.id_name_partial_field() investor_company_country = fields.id_name_field() investment_type = fields.id_name_field() investor_type = fields.id_name_field() level_of_involvement = fields.id_name_field() likelihood_to_land = fields.id_name_field() project_assurance_adviser = fields.contact_or_adviser_field( include_dit_team=True) project_manager = fields.contact_or_adviser_field(include_dit_team=True) name = Text(fields={ 'keyword': fields.NormalizedKeyword(), 'trigram': fields.TrigramText(), }, ) new_tech_to_uk = Boolean() non_fdi_r_and_d_budget = Boolean() number_new_jobs = Integer() number_safeguarded_jobs = Long() modified_on = Date() project_arrived_in_triage_on = Date() project_code = fields.NormalizedKeyword(fields={ 'trigram': fields.TrigramText(), }, ) proposal_deadline = Date() other_business_activity = Text(index=False) quotable_as_public_case_study = Boolean() r_and_d_budget = Boolean() reason_abandoned = Text(index=False) reason_delayed = Text(index=False) reason_lost = Text(index=False) referral_source_activity = fields.id_name_field() referral_source_activity_event = fields.NormalizedKeyword() referral_source_activity_marketing = fields.id_name_field() referral_source_activity_website = fields.id_name_field() referral_source_adviser = Object(properties={ 'id': Keyword(index=False), 'first_name': Text(index=False), 'last_name': Text(index=False), 'name': Text(index=False), }, ) sector = fields.sector_field() site_decided = Boolean() some_new_jobs = Boolean() specific_programme = fields.id_name_field() stage = fields.id_name_field() status = fields.NormalizedKeyword() team_members = fields.contact_or_adviser_field(include_dit_team=True) total_investment = Double() uk_company = fields.id_name_partial_field() uk_company_decided = Boolean() uk_region_locations = fields.id_name_field() will_new_jobs_last_two_years = Boolean() level_of_involvement_simplified = Keyword() gross_value_added = Double() MAPPINGS = { 'actual_uk_regions': lambda col: [dict_utils.id_name_dict(c) for c in col.all()], 'archived_by': dict_utils.contact_or_adviser_dict, 'associated_non_fdi_r_and_d_project': dict_utils.investment_project_dict, 'average_salary': dict_utils.id_name_dict, 'business_activities': lambda col: [dict_utils.id_name_dict(c) for c in col.all()], 'client_contacts': lambda col: [dict_utils.contact_or_adviser_dict(c) for c in col.all()], 'client_relationship_manager': dict_utils.adviser_dict_with_team, 'country_lost_to': dict_utils.id_name_dict, 'country_investment_originates_from': dict_utils.id_name_dict, 'created_by': dict_utils.adviser_dict_with_team, 'delivery_partners': lambda col: [dict_utils.id_name_dict(c) for c in col.all()], 'fdi_type': dict_utils.id_name_dict, 'fdi_value': dict_utils.id_name_dict, 'intermediate_company': dict_utils.id_name_dict, 'investment_type': dict_utils.id_name_dict, 'investor_company': dict_utils.id_name_dict, 'investor_company_country': dict_utils.id_name_dict, 'investor_type': dict_utils.id_name_dict, 'level_of_involvement': dict_utils.id_name_dict, 'likelihood_to_land': dict_utils.id_name_dict, 'project_assurance_adviser': dict_utils.adviser_dict_with_team, 'project_code': str, 'project_manager': dict_utils.adviser_dict_with_team, 'referral_source_activity': dict_utils.id_name_dict, 'referral_source_activity_marketing': dict_utils.id_name_dict, 'referral_source_activity_website': dict_utils.id_name_dict, 'referral_source_adviser': dict_utils.contact_or_adviser_dict, 'sector': dict_utils.sector_dict, 'specific_programme': dict_utils.id_name_dict, 'stage': dict_utils.id_name_dict, 'team_members': lambda col: [ dict_utils.contact_or_adviser_dict(c.adviser, include_dit_team=True) for c in col.all() ], 'uk_company': dict_utils.id_name_dict, 'uk_region_locations': lambda col: [dict_utils.id_name_dict(c) for c in col.all()], } SEARCH_FIELDS = ( 'id', 'name', 'name.trigram', 'uk_company.name', 'uk_company.name.trigram', 'investor_company.name', 'investor_company.name.trigram', 'project_code.trigram', ) class Meta: """Default document meta data.""" doc_type = DOC_TYPE class Index: doc_type = DOC_TYPE
class DoubleRange(RangeField): """ Custom defined as the one in the python package is bugged """ name = 'double_range' _core_field = Double()