class TopicDocument(es.Document): topic_id = es.Keyword() topic_weight = es.Float() document_es_id = es.Keyword() datetime = es.Date() document_source = es.Keyword() document_corpus = es.Keyword() document_num_views = es.Integer() document_num_comments = es.Integer() class Index: name = ES_INDEX_TOPIC_DOCUMENT # f"{ES_INDEX_TOPIC_DOCUMENT}_{tm}" using = ES_CLIENT settings = { "number_of_shards": 3, "number_of_replicas": 1, "max_result_window": 5000000, } settings_dynamic = { "number_of_shards": 2, "number_of_replicas": 1, "max_result_window": 5000000, } mappings = { "properties": { "datetime": { "type": "date" }, "document_es_id": { "type": "keyword", }, "document_source": { "type": "keyword", }, "document_corpus": { "type": "keyword", }, "document_num_views": { "type": "long", }, "document_num_comments": { "type": "long", }, "topic_id": { "type": "keyword", }, "topic_weight": { "type": "float" } } }
class TestSearchDocument(BaseDocument): id = dsl.Integer() name = dsl.Text(fielddata=True) num = dsl.Integer() date = dsl.Date() json = dsl.Object() field_name = Name() field_process_type = ProcessType() none_test = dsl.Integer() class Index: name = "test_search"
class TestSearchDocument(BaseDocument): # pylint: disable=no-member id = dsl.Integer() # pylint: disable=invalid-name name = dsl.String() num = dsl.Integer() json = dsl.Object() field_name = Name() field_process_type = ProcessType() none_test = dsl.Integer() class Meta: index = 'test_search'
def decorator(cls): print("setup_schema:" + cls.__name__.lower()) # # create an elastic model from the schema # # there are two special keys you can use additionally to the # standard cerberus syntx: # "elastic" : add any Elastic DSL "Column" __init__ kwargs here, they will be handed raw # to the Column __init__ # "elastictype" : add a more specific elasticserach_dsl type definition (Text instead of string) # the two special keys will be removed from the schema at the end of this # decorator. # # # now set the right elastic types for the doc # from datetime import datetime #from elasticsearch_dsl import DocType, String, Date, Nested, Boolean, Integer\ # Float, Byte, Text, analyzer, InnerObjectWrapper, Completion import elasticsearch_dsl for elem in cls.schema.keys(): #print(elem) # the raw Column __init__ parameters dict elastic=cls.schema[elem].get("elastic", {}) if cls.schema[elem]["type"] == "integer": setattr(cls, elem, elasticsearch_dsl.Integer(**elastic)) elif cls.schema[elem]["type"] == "float": setattr(cls, elem, elasticsearch_dsl.Float(**elastic)) elif cls.schema[elem]["type"] == "string": setattr(cls, elem, elasticsearch_dsl.Text(**elastic)) elif cls.schema[elem]["type"] == "bool": setattr(cls, elem, elasticsearch_dsl.Boolean(**elastic)) elif cls.schema[elem]["type"] == "date": setattr(cls, elem, elasticsearch_dsl.Date(**elastic)) elif cls.schema[elem]["type"] == "datetime": setattr(cls, elem, elasticsearch_dsl.Date(**elastic)) elif cls.schema[elem]["type"] == "number": setattr(cls, elem, elasticsearch_dsl.Integer(**elastic)) elif cls.schema[elem]["type"] == "binary": setattr(cls, elem, elasticsearch_dsl.Byte(**elastic)) elif cls.schema[elem]["type"] == "list": setattr(cls, elem, elasticsearch_dsl.Keyword(**elastic)) else: raise Exception("Wrong Datatype in schema") #print(" .. removing the schema (raw) elastic key(s)") cls.schema[elem].pop("elastic", None) cls.schema[elem].pop("elastictype", None) return cls
class TopicCombo(es.Document): topics = es.Object() common_docs_ids = es.Keyword() common_docs_num = es.Integer() class Index: name = ES_INDEX_TOPIC_COMBOS # f"{ES_INDEX_TOPIC_COMBOS}_{tm}" using = ES_CLIENT settings = { "number_of_shards": 2, "number_of_replicas": 1, "max_result_window": 5000000, } mappings = { "properties": { "topics": { "type": "object" }, "common_docs_ids": { "type": "keyword", }, "common_docs_num": { "type": "integer", }, } }
class BaseDocument(indices.BaseDocument): """Base search document.""" id = dsl.Integer() # pylint: disable=invalid-name slug = Slug() version = dsl.Keyword() name = Name() created = dsl.Date() modified = dsl.Date() contributor_id = dsl.Integer() contributor_name = User() # We use a separate field for contributor sorting because we use an entirely # different value for it (the display name). contributor_sort = dsl.Keyword() owner_ids = dsl.Integer(multi=True) owner_names = User(multi=True)
class TestSearchDocument(BaseDocument): # pylint: disable=no-member name = dsl.String() num = dsl.Integer() json = dsl.Object() class Meta: index = 'test_search'
class TrainingJob(elasticsearch_dsl.Document): id = elasticsearch_dsl.Integer() schema_version = elasticsearch_dsl.Integer() job_name = elasticsearch_dsl.Keyword() author = elasticsearch_dsl.Keyword() created_at = elasticsearch_dsl.Date() ended_at = elasticsearch_dsl.Date() params = elasticsearch_dsl.Text() raw_log = elasticsearch_dsl.Text() model_url = elasticsearch_dsl.Text() # Metrics epochs = elasticsearch_dsl.Integer() train_acc = elasticsearch_dsl.Float() final_val_acc = elasticsearch_dsl.Float() best_val_acc = elasticsearch_dsl.Float() final_val_loss = elasticsearch_dsl.Float() best_val_loss = elasticsearch_dsl.Float() final_val_sensitivity = elasticsearch_dsl.Float() best_val_sensitivity = elasticsearch_dsl.Float() final_val_specificity = elasticsearch_dsl.Float() best_val_specificity = elasticsearch_dsl.Float() final_val_auc = elasticsearch_dsl.Float() best_val_auc = elasticsearch_dsl.Float() # Params batch_size = elasticsearch_dsl.Integer() val_split = elasticsearch_dsl.Float() seed = elasticsearch_dsl.Integer() rotation_range = elasticsearch_dsl.Float() width_shift_range = elasticsearch_dsl.Float() height_shift_range = elasticsearch_dsl.Float() shear_range = elasticsearch_dsl.Float() zoom_range = elasticsearch_dsl.Keyword() horizontal_flip = elasticsearch_dsl.Boolean() vertical_flip = elasticsearch_dsl.Boolean() dropout_rate1 = elasticsearch_dsl.Float() dropout_rate2 = elasticsearch_dsl.Float() data_dir = elasticsearch_dsl.Keyword() gcs_url = elasticsearch_dsl.Keyword() mip_thickness = elasticsearch_dsl.Integer() height_offset = elasticsearch_dsl.Integer() pixel_value_range = elasticsearch_dsl.Keyword() # We need to keep a list of params for the parser because # we can't use traditional approaches to get the class attrs params_to_parse = [ 'batch_size', 'val_split', 'seed', 'rotation_range', 'width_shift_range', 'height_shift_range', 'shear_range', 'zoom_range', 'horizontal_flip', 'vertical_flip', 'dropout_rate1', 'dropout_rate2', 'data_dir', 'gcs_url', 'mip_thickness', 'height_offset', 'pixel_value_range' ] class Index: name = TRAINING_JOBS
class EntityDocument(CollectionDocument): """Document for entity search.""" collection = dsl.Integer() type = dsl.Keyword() class Index: """Meta class for entity search document.""" name = "entity"
class EntityDocument(CollectionDocument): """Document for entity search.""" descriptor_completed = dsl.Boolean() collections = dsl.Integer(multi=True) class Meta: """Meta class for entity search document.""" index = 'entity'
class PhotoDocument(esd.DocType): date = esd.Date() aperture = esd.Float() exposure = esd.Float() focal_length = esd.Float() focal_length_35 = esd.Float() iso = esd.Integer() size = esd.Integer() model = esd.String(index='not_analyzed') #analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ])) model_ci = esd.String(analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ])) lens = esd.String(index='not_analyzed') lens_ci = esd.String(analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ])) path = esd.String(index='not_analyzed') dirname = esd.String(index='not_analyzed') basename = esd.String(index='not_analyzed') def extended_dict(self): dct = self.to_dict() dct["id"] = self.meta.id return dct
class TagDoc(DocType): id = edsl.Integer(required=True) name = edsl.Text(copy_to=ALL_DATA_FIELD) @classmethod def from_instance(cls, instance): doc = cls(meta={'id': instance.id}, id=instance.id, name=instance.name) return doc.to_dict(include_meta=True) class Index: name = 'tag'
class DataDocument(BaseDocument): """Document for data search.""" started = dsl.Date() finished = dsl.Date() status = dsl.Keyword() process = dsl.Integer() process_type = ProcessType() # Keep backward compatibility. type = ProcessType() process_name = Name() tags = dsl.Keyword(multi=True) collection = dsl.Integer() entity = dsl.Integer() class Index: """Meta class for data search document.""" name = 'data'
class GroupDocument(esd.DocType): date = esd.Date() aperture = esd.Float() exposure = esd.Float() focal_length = esd.Float() focal_length_35 = esd.Float() iso = esd.Integer() model = esd.String(index='not_analyzed') #analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ])) lens = esd.String(index='not_analyzed') path = esd.String(index='not_analyzed') dirname = esd.String(index='not_analyzed') basename = esd.String(index='not_analyzed')
class Dictionary(es.Document): corpus = es.Keyword() name = es.Keyword() description = es.Text() datetime = es.Date() number_of_documents = es.Integer() is_ready = es.Boolean() class Index: name = ES_INDEX_DICTIONARY_INDEX using = ES_CLIENT
class DataDocType(es.DocType): """Elasticsearch test model""" first_name = es.Keyword() last_name = es.Keyword() city = es.Text() skills = es.Keyword() birthday = es.Date() is_active = es.Boolean() score = es.Integer() description = es.Text() class Meta: index = 'test'
class DataDocument(BaseDocument): """Document for data search.""" started = dsl.Date() finished = dsl.Date() status = dsl.Keyword() process = dsl.Integer() process_type = ProcessType() # Keep backward compatibility. type = ProcessType() # pylint: disable=invalid-name process_name = Name() tags = dsl.Keyword(multi=True) collection = dsl.Integer(multi=True) parents = dsl.Integer(multi=True) children = dsl.Integer(multi=True) entity = dsl.Integer(multi=True) class Meta: """Meta class for data search document.""" index = 'data'
class SponsorDoc(DocType): id = edsl.Integer(required=True) name = edsl.Text(copy_to=ALL_DATA_FIELD, analyzer=autocomplete_analyzer, search_analyzer='standard') @classmethod def from_instance(cls, instance): doc = cls(meta={'id': instance.id}, id=instance.id, name=instance.name) return doc.to_dict(include_meta=True) class Index: name = 'sponsor'
class Gebied(es.DocType): """ Een vindbaar gebied Unesco Buurt Buurtcombinatie Stadsdeel Grootstedelijk Gemeente Woonplaats """ id = es.Keyword() _display = es.Keyword() naam = es.Text(analyzer=analyzers.adres, fields=text_fields) naam_nen = es.Text(analyzer=analyzers.adres, fields=text_fields) naam_ptt = es.Text(analyzer=analyzers.adres, fields=text_fields) postcode = es.Text(analyzer=analyzers.postcode, fields=postcode_fields) g_code = es.Text(analyzer=analyzers.autocomplete, search_analyzer='standard', fields={ 'keyword': es.Keyword(), 'ngram': es.Text(analyzer=analyzers.autocomplete), }) # gebied order order = es.Integer() subtype = es.Keyword() type = es.Keyword() centroid = es.GeoPoint() landelijk_id = es.Text( # Only for voor openbare_ruimte analyzer=analyzers.autocomplete, fields={ 'raw': es.Keyword(), 'nozero': es.Text(analyzer=analyzers.nozero) }) gsg_type = es.Keyword() class Index: name = settings.ELASTIC_INDICES['BAG_GEBIED']
class TopicModellingIndex(es.Document): corpus = es.Keyword() source = es.Keyword() number_of_documents = es.Integer() is_ready = es.Boolean() has_topic_info = es.Boolean() name = es.Keyword() description = es.Text() datetime_created = es.Date() datetime_finished = es.Date() datetime_from = es.Date() datetime_to = es.Date() algorithm = es.Keyword() number_of_topics = es.Integer() hierarchical = es.Boolean() meta_parameters = es.Object() perplexity = es.Float() purity = es.Float() contrast = es.Float() coherence = es.Float() tau_smooth_sparse_theta = es.Float() tau_smooth_sparse_phi = es.Float() tau_decorrelator_phi = es.Float() tau_coherence_phi = es.Float() topics = es.Nested(Topic) is_actualizable = es.Boolean() class Index: name = ES_INDEX_TOPIC_MODELLING using = ES_CLIENT
class Text(es.InnerDoc): """Simple Elasticsearch DSL mapping of the text data this plugin will return.""" full_text = es.Text() pattern_polarity = es.Float() pattern_subjectivity = es.Float() short_text = es.Text() translated = es.Text() truncated = es.Boolean() tweet_length = es.Integer() vader_compound = es.Float() vader_compound_inverted = es.Float() vader_negative = es.Float() vader_neutral = es.Float() vader_positive = es.Float()
class sigpac_record(dsl.DocType): dn_pk = dsl.Long() provincia = dsl.Integer() municipio = dsl.Integer() poligono = dsl.Integer() parcela = dsl.Integer() recinto = dsl.Integer() zona = dsl.Integer() perimetro = dsl.Long() superficie = dsl.Long() pend_med = dsl.Integer() points = dsl.GeoShape() bbox = dsl.GeoShape() bbox_center = dsl.GeoPoint(lat_lon=True) uso_sigpac = dsl.String() agregado = dsl.Integer() cap_auto = dsl.Integer() cap_manual = dsl.Integer() coef_regadio = dsl.Float() c_refpar = dsl.String() c_refpol = dsl.String() c_refrec = dsl.String() dn_oid = dsl.Long() elevation = dsl.Float() def save(self, **kwargs): return super(sigpac_record, self).save(**kwargs) class Meta: index = 'plots' doc_type = 'sigpac'
class DataDocType(es.Document): """Elasticsearch test model""" first_name = es.Keyword() last_name = es.Keyword() city = es.Text() skills = es.Keyword() birthday = es.Date() is_active = es.Boolean() score = es.Integer() location = es.GeoPoint() description = es.Text() class Index: name = 'test'
class EmbeddingIndex(es.Document): corpus = es.Keyword() number_of_documents = es.Integer() is_ready = es.Boolean() name = es.Keyword() description = es.Text() datetime_created = es.Date() datetime_finished = es.Date() by_unit = es.Keyword() # Token/Word/Sentence/Text algorithm = es.Keyword() pooling = es.Keyword() meta_parameters = es.Object() class Index: name = ES_INDEX_EMBEDDING using = ES_CLIENT
class ContainerDoc(DocType): id = edsl.Integer(required=True) name = edsl.Text(copy_to=ALL_DATA_FIELD, analyzer=autocomplete_analyzer, search_analyzer='standard') issn = edsl.Keyword() @classmethod def from_instance(cls, container): doc = cls(meta={'id': container.id}, id=container.id, name=container.name, issn=container.issn) return doc.to_dict(include_meta=True) class Index: name = 'container'
class KadastraalSubject(es.DocType): naam = es.Text( analyzer=analyzers.naam, fields={ 'raw': es.Keyword(), 'ngram': es.Text( analyzer=analyzers.kad_sbj_naam, search_analyzer=analyzers.kad_obj_aanduiding_keyword)}) natuurlijk_persoon = es.Boolean() geslachtsnaam = es.Text(analyzer=analyzers.naam) order = es.Integer() subtype = es.Keyword() _display = es.Keyword() class Index: name = settings.ELASTIC_INDICES['BRK_SUBJECT']
class KadastraalObject(es.DocType): aanduiding = es.Text( fielddata=True, analyzer=analyzers.postcode, fields=kad_text_fields) # The search aanduiding is the aanduiding without the "acd00 " prefix # remove this in future short_aanduiding = es.Text( analyzer=analyzers.kad_obj_aanduiding, search_analyzer='standard', fields=kad_text_fields) sectie = es.Text( fields=kad_text_fields, ) objectnummer = es.Text( analyzer=analyzers.autocomplete, search_analyzer='standard', fields=kad_int_fields, ) indexletter = es.Keyword( fields=kad_text_fields, ) indexnummer = es.Text( analyzer=analyzers.autocomplete, search_analyzer='standard', fields=kad_int_fields ) order = es.Integer() centroid = es.GeoPoint() gemeente = es.Text(analyzer=analyzers.naam) gemeente_code = es.Keyword(normalizer=analyzers.lowercase) subtype = es.Keyword() _display = es.Keyword() class Index: name = settings.ELASTIC_INDICES['BRK_OBJECT']
class InfoRiegoRecord(dsl.DocType): code = dsl.String() location = dsl.String() date = dsl.Date() rain = dsl.Float() temperature = dsl.Float() rel_humidity = dsl.Float() radiation = dsl.Float() wind_speed = dsl.Float() wind_direction = dsl.Float() lat_lon = dsl.GeoPoint(lat_lon=True) station_height = dsl.Integer() def save(self, **kwargs): return super(InfoRiegoRecord, self).save(**kwargs) class Meta: index = 'inforiego'
class AuthorDoc(DocType): id = edsl.Integer(required=True) orcid = edsl.Keyword() researcherid = edsl.Keyword() email = edsl.Keyword() name = edsl.Text(copy_to=ALL_DATA_FIELD, analyzer=autocomplete_analyzer, search_analyzer='standard') @classmethod def from_instance(cls, author): doc = cls(meta={'id': author.id}, id=author.id, orcid=author.orcid, researcherid=author.researcherid, email=author.email, name=author.name) return doc.to_dict(include_meta=True) class Index: name = 'author'
class Term(es.DocType): term = es.Text() gewicht = es.Integer()