class TrainingJob(elasticsearch_dsl.Document): id = elasticsearch_dsl.Integer() schema_version = elasticsearch_dsl.Integer() job_name = elasticsearch_dsl.Keyword() author = elasticsearch_dsl.Keyword() created_at = elasticsearch_dsl.Date() ended_at = elasticsearch_dsl.Date() params = elasticsearch_dsl.Text() raw_log = elasticsearch_dsl.Text() model_url = elasticsearch_dsl.Text() # Metrics epochs = elasticsearch_dsl.Integer() train_acc = elasticsearch_dsl.Float() final_val_acc = elasticsearch_dsl.Float() best_val_acc = elasticsearch_dsl.Float() final_val_loss = elasticsearch_dsl.Float() best_val_loss = elasticsearch_dsl.Float() final_val_sensitivity = elasticsearch_dsl.Float() best_val_sensitivity = elasticsearch_dsl.Float() final_val_specificity = elasticsearch_dsl.Float() best_val_specificity = elasticsearch_dsl.Float() final_val_auc = elasticsearch_dsl.Float() best_val_auc = elasticsearch_dsl.Float() # Params batch_size = elasticsearch_dsl.Integer() val_split = elasticsearch_dsl.Float() seed = elasticsearch_dsl.Integer() rotation_range = elasticsearch_dsl.Float() width_shift_range = elasticsearch_dsl.Float() height_shift_range = elasticsearch_dsl.Float() shear_range = elasticsearch_dsl.Float() zoom_range = elasticsearch_dsl.Keyword() horizontal_flip = elasticsearch_dsl.Boolean() vertical_flip = elasticsearch_dsl.Boolean() dropout_rate1 = elasticsearch_dsl.Float() dropout_rate2 = elasticsearch_dsl.Float() data_dir = elasticsearch_dsl.Keyword() gcs_url = elasticsearch_dsl.Keyword() mip_thickness = elasticsearch_dsl.Integer() height_offset = elasticsearch_dsl.Integer() pixel_value_range = elasticsearch_dsl.Keyword() # We need to keep a list of params for the parser because # we can't use traditional approaches to get the class attrs params_to_parse = [ 'batch_size', 'val_split', 'seed', 'rotation_range', 'width_shift_range', 'height_shift_range', 'shear_range', 'zoom_range', 'horizontal_flip', 'vertical_flip', 'dropout_rate1', 'dropout_rate2', 'data_dir', 'gcs_url', 'mip_thickness', 'height_offset', 'pixel_value_range' ] class Index: name = TRAINING_JOBS
class GameSummary(elasticsearch_dsl.Document): """Game search model""" id = elasticsearch_dsl.Text() name = elasticsearch_dsl.Text() isPublic = elasticsearch_dsl.Boolean() players = elasticsearch_dsl.Object(PlayersInGame) class Index: # pylint: disable=missing-class-docstring name = "games"
class Pand(es.DocType): id = es.Keyword() landelijk_id = es.Text(analyzer=analyzers.autocomplete, fields={ 'raw': es.Keyword(), 'nozero': es.Text(analyzer=analyzers.nozero) }) pandnaam = es.Text(analyzer=analyzers.adres, fields=naam_fields) _display = es.Keyword() class Index: name = settings.ELASTIC_INDICES['BAG_PAND']
class CollectionDocument(BaseDocument): """Document for collection search.""" # Data values extracted from the descriptor. descriptor_data = dsl.Text(multi=True) tags = dsl.Keyword(multi=True) description = dsl.Text(fielddata=True) class Index: """Meta class for collection search document.""" name = "collection"
class FeatureSearchDocument(BaseDocument): """Index for feature search.""" # pylint: disable=no-member source = dsl.Keyword() feature_id = dsl.Keyword( # Additional subfield used for boosting during autocomplete. fields={ 'lower': { 'type': 'text', 'analyzer': identifier_analyzer }, 'ngrams': { 'type': 'text', 'analyzer': autocomplete_analyzer, 'search_analyzer': autocomplete_search_analyzer, }, }, ) species = dsl.Keyword() type = dsl.Keyword() # pylint: disable=invalid-name sub_type = dsl.Keyword(index=False) name = dsl.Keyword( # Additional subfield used for boosting during autocomplete. fields={ 'lower': { 'type': 'text', 'analyzer': identifier_analyzer }, 'ngrams': { 'type': 'text', 'analyzer': autocomplete_analyzer, 'search_analyzer': autocomplete_search_analyzer, }, }, ) full_name = dsl.Text(index=False) description = dsl.Text(index=False) aliases = dsl.Keyword( multi=True, # Additional subfield used for boosting during autocomplete. fields={ 'ngrams': { 'type': 'text', 'analyzer': autocomplete_analyzer, 'search_analyzer': autocomplete_search_analyzer, }, }, ) class Meta: """Meta class for feature search document.""" index = 'feature_search'
class DataDocType(es.DocType): """Elasticsearch test model""" first_name = es.Keyword() last_name = es.Keyword() city = es.Text() skills = es.Keyword() birthday = es.Date() is_active = es.Boolean() score = es.Integer() description = es.Text() class Meta: index = 'test'
class Text(es.InnerDoc): """Simple Elasticsearch DSL mapping of the text data this plugin will return.""" full_text = es.Text() pattern_polarity = es.Float() pattern_subjectivity = es.Float() short_text = es.Text() translated = es.Text() truncated = es.Boolean() tweet_length = es.Integer() vader_compound = es.Float() vader_compound_inverted = es.Float() vader_negative = es.Float() vader_neutral = es.Float() vader_positive = es.Float()
class Location(es.InnerDoc): # pytlint: disable = too-few-public-methods """ InnerDoc mapping of location information embedded within a tweet. This data is created by us during the processing pipeline. """ city = es.Keyword(doc_values=True) country = es.Keyword(doc_values=True) county = es.Keyword(doc_values=True) id = es.Text() latitude = es.Text() longitude = es.Text() resolution_method = es.Text() state = es.Keyword(doc_values=True)
class DataDocType(es.Document): """Elasticsearch test model""" first_name = es.Keyword() last_name = es.Keyword() city = es.Text() skills = es.Keyword() birthday = es.Date() is_active = es.Boolean() score = es.Integer() location = es.GeoPoint() description = es.Text() class Index: name = 'test'
def document_field(field): """ The default ``field_factory`` method for converting Django field instances to ``elasticsearch_dsl.Field`` instances. Auto-created fields (primary keys, for example) and one-to-many fields (reverse FK relationships) are skipped. """ if field.auto_created or field.one_to_many: return None if field.many_to_many: return RawMultiString defaults = { models.DateField: dsl.Date(), models.DateTimeField: dsl.Date(), models.IntegerField: dsl.Long(), models.PositiveIntegerField: dsl.Long(), models.BooleanField: dsl.Boolean(), models.NullBooleanField: dsl.Boolean(), # models.SlugField: dsl.String(index='not_analyzed'), models.SlugField: dsl.Text(index='not_analyzed'), models.DecimalField: dsl.Double(), models.FloatField: dsl.Float(), } return defaults.get(field.__class__, RawString)
class ExtendedDataDocument(object): """Data ES document extensions.""" source = dsl.Keyword() species = dsl.Text() build = dsl.Keyword() feature_type = dsl.Keyword()
class Node(es.DocType): """ Elastic document describing user """ node_type = es.Keyword() objectID = es.Keyword() name = es.Text( fielddata=True, analyzer=autocomplete ) user = es.Object( fields={ 'id': es.Keyword(), 'name': es.Text( fielddata=True, analyzer=autocomplete) } ) description = es.Text() is_free = es.Boolean() project = es.Object( fields={ 'id': es.Keyword(), 'name': es.Keyword(), 'url': es.Keyword(), } ) media = es.Keyword() picture = es.Keyword() tags = es.Keyword(multi=True) license_notes = es.Text() created_at = es.Date() updated_at = es.Date() class Meta: index = 'nodes'
class Monument(es.Document): """ Elastic data for Monument """ id = es.Keyword() type = es.Keyword() naam = es.Text(fielddata=True, analyzer=analyzers.monument_naam, fields={ 'raw': es.Text(fielddata=True), 'keyword': es.Text(fielddata=True, analyzer=analyzers.subtype) }) class Index: name = settings.ELASTIC_INDICES['MONUMENTEN']
class User(es.DocType): """Elastic document describing user.""" objectID = es.Keyword() username = es.Text(fielddata=True, analyzer=autocomplete) username_exact = es.Keyword() full_name = es.Text(fielddata=True, analyzer=autocomplete) roles = es.Keyword(multi=True) groups = es.Keyword(multi=True) email = es.Text(fielddata=True, analyzer=autocomplete) email_exact = es.Keyword() class Meta: index = 'users'
class LawSuitModel(es.Document): class Index: name = 'law_go_kr' case_id = es.Keyword() index_data = es.Keyword() detail_data_html = es.Text() detail_data_searchable = es.Keyword()
class KadastraalSubject(es.DocType): naam = es.Text( analyzer=analyzers.naam, fields={ 'raw': es.Keyword(), 'ngram': es.Text( analyzer=analyzers.kad_sbj_naam, search_analyzer=analyzers.kad_obj_aanduiding_keyword)}) natuurlijk_persoon = es.Boolean() geslachtsnaam = es.Text(analyzer=analyzers.naam) order = es.Integer() subtype = es.Keyword() _display = es.Keyword() class Index: name = settings.ELASTIC_INDICES['BRK_SUBJECT']
class TagDoc(DocType): id = edsl.Integer(required=True) name = edsl.Text(copy_to=ALL_DATA_FIELD) @classmethod def from_instance(cls, instance): doc = cls(meta={'id': instance.id}, id=instance.id, name=instance.name) return doc.to_dict(include_meta=True) class Index: name = 'tag'
class CollectionDocument(BaseDocument): """Document for collection search.""" # Data values extracted from the descriptor. descriptor_data = dsl.Text(multi=True) tags = dsl.Keyword(multi=True) class Meta: """Meta class for collection search document.""" index = 'collection'
class Gebied(es.DocType): """ Een vindbaar gebied Unesco Buurt Buurtcombinatie Stadsdeel Grootstedelijk Gemeente Woonplaats """ id = es.Keyword() _display = es.Keyword() naam = es.Text(analyzer=analyzers.adres, fields=text_fields) naam_nen = es.Text(analyzer=analyzers.adres, fields=text_fields) naam_ptt = es.Text(analyzer=analyzers.adres, fields=text_fields) postcode = es.Text(analyzer=analyzers.postcode, fields=postcode_fields) g_code = es.Text(analyzer=analyzers.autocomplete, search_analyzer='standard', fields={ 'keyword': es.Keyword(), 'ngram': es.Text(analyzer=analyzers.autocomplete), }) # gebied order order = es.Integer() subtype = es.Keyword() type = es.Keyword() centroid = es.GeoPoint() landelijk_id = es.Text( # Only for voor openbare_ruimte analyzer=analyzers.autocomplete, fields={ 'raw': es.Keyword(), 'nozero': es.Text(analyzer=analyzers.nozero) }) gsg_type = es.Keyword() class Index: name = settings.ELASTIC_INDICES['BAG_GEBIED']
class TestAnalyzerSearchDocument(BaseDocument): name = dsl.Text(analyzer=dsl.analyzer( "test_analyzer", tokenizer="keyword", filter=[ "lowercase", ], fielddata=True, )) class Index: name = "test_analyzer_search"
class TestAnalyzerSearchDocument(BaseDocument): name = dsl.Text(analyzer=dsl.analyzer( 'test_analyzer', tokenizer='keyword', filter=[ 'lowercase', ], fielddata=True, )) class Index: name = 'test_analyzer_search'
class Dictionary(es.Document): corpus = es.Keyword() name = es.Keyword() description = es.Text() datetime = es.Date() number_of_documents = es.Integer() is_ready = es.Boolean() class Index: name = ES_INDEX_DICTIONARY_INDEX using = ES_CLIENT
class SponsorDoc(DocType): id = edsl.Integer(required=True) name = edsl.Text(copy_to=ALL_DATA_FIELD, analyzer=autocomplete_analyzer, search_analyzer='standard') @classmethod def from_instance(cls, instance): doc = cls(meta={'id': instance.id}, id=instance.id, name=instance.name) return doc.to_dict(include_meta=True) class Index: name = 'sponsor'
class TestAnalyzerSearchDocument(BaseDocument): # pylint: disable=no-member name = dsl.Text(analyzer=dsl.analyzer( 'test_analyzer', tokenizer='keyword', filter=[ 'lowercase', ], fielddata=True, )) class Meta: index = 'test_analyzer_search'
class TestSearchDocument(BaseDocument): id = dsl.Integer() name = dsl.Text(fielddata=True) num = dsl.Integer() date = dsl.Date() json = dsl.Object() field_name = Name() field_process_type = ProcessType() none_test = dsl.Integer() class Index: name = "test_search"
def decorator(cls): print("setup_schema:" + cls.__name__.lower()) # # create an elastic model from the schema # # there are two special keys you can use additionally to the # standard cerberus syntx: # "elastic" : add any Elastic DSL "Column" __init__ kwargs here, they will be handed raw # to the Column __init__ # "elastictype" : add a more specific elasticserach_dsl type definition (Text instead of string) # the two special keys will be removed from the schema at the end of this # decorator. # # # now set the right elastic types for the doc # from datetime import datetime #from elasticsearch_dsl import DocType, String, Date, Nested, Boolean, Integer\ # Float, Byte, Text, analyzer, InnerObjectWrapper, Completion import elasticsearch_dsl for elem in cls.schema.keys(): #print(elem) # the raw Column __init__ parameters dict elastic=cls.schema[elem].get("elastic", {}) if cls.schema[elem]["type"] == "integer": setattr(cls, elem, elasticsearch_dsl.Integer(**elastic)) elif cls.schema[elem]["type"] == "float": setattr(cls, elem, elasticsearch_dsl.Float(**elastic)) elif cls.schema[elem]["type"] == "string": setattr(cls, elem, elasticsearch_dsl.Text(**elastic)) elif cls.schema[elem]["type"] == "bool": setattr(cls, elem, elasticsearch_dsl.Boolean(**elastic)) elif cls.schema[elem]["type"] == "date": setattr(cls, elem, elasticsearch_dsl.Date(**elastic)) elif cls.schema[elem]["type"] == "datetime": setattr(cls, elem, elasticsearch_dsl.Date(**elastic)) elif cls.schema[elem]["type"] == "number": setattr(cls, elem, elasticsearch_dsl.Integer(**elastic)) elif cls.schema[elem]["type"] == "binary": setattr(cls, elem, elasticsearch_dsl.Byte(**elastic)) elif cls.schema[elem]["type"] == "list": setattr(cls, elem, elasticsearch_dsl.Keyword(**elastic)) else: raise Exception("Wrong Datatype in schema") #print(" .. removing the schema (raw) elastic key(s)") cls.schema[elem].pop("elastic", None) cls.schema[elem].pop("elastictype", None) return cls
class Mappings(es.Document): threshold = es.Keyword() meta_dtm_name = es.Keyword() topic_modelling_first = es.Keyword() topic_modelling_second = es.Keyword() topic_modelling_first_from = es.Date(), topic_modelling_second_to = es.Date(), mappings_dict = es.Text() scores_list = es.Keyword() delta_words_dict = es.Text() delta_count_dict = es.Text() class Index: name = ES_INDEX_MAPPINGS using = ES_CLIENT settings = { "index.mapping.total_fields.limit": 5000, "number_of_shards": 1, "number_of_replicas": 1, } mappings = { "properties": { "threshold": { "type": "keyword", }, "meta_dtm_name": { "type": "keyword", }, "topic_modelling_first_from": { "type": "date" }, "topic_modelling_second_to": { "type": "date" } }, }
class TestSearchDocument(BaseDocument): # pylint: disable=no-member id = dsl.Integer() # pylint: disable=invalid-name name = dsl.Text(fielddata=True) num = dsl.Integer() date = dsl.Date() json = dsl.Object() field_name = Name() field_process_type = ProcessType() none_test = dsl.Integer() class Meta: index = 'test_search'
class Bouwblok(es.DocType): """ Bouwblok searchable fields. """ code = es.Text( analyzer=analyzers.bouwblokid, fields={'keyword': es.Keyword()}, ) subtype = es.Keyword() _display = es.Keyword() class Index: name = settings.ELASTIC_INDICES['BAG_BOUWBLOK']
class ContainerDoc(DocType): id = edsl.Integer(required=True) name = edsl.Text(copy_to=ALL_DATA_FIELD, analyzer=autocomplete_analyzer, search_analyzer='standard') issn = edsl.Keyword() @classmethod def from_instance(cls, container): doc = cls(meta={'id': container.id}, id=container.id, name=container.name, issn=container.issn) return doc.to_dict(include_meta=True) class Index: name = 'container'