Exemple #1
0
class TrainingJob(elasticsearch_dsl.Document):
    id = elasticsearch_dsl.Integer()
    schema_version = elasticsearch_dsl.Integer()
    job_name = elasticsearch_dsl.Keyword()
    author = elasticsearch_dsl.Keyword()
    created_at = elasticsearch_dsl.Date()
    ended_at = elasticsearch_dsl.Date()
    params = elasticsearch_dsl.Text()
    raw_log = elasticsearch_dsl.Text()
    model_url = elasticsearch_dsl.Text()

    # Metrics
    epochs = elasticsearch_dsl.Integer()
    train_acc = elasticsearch_dsl.Float()
    final_val_acc = elasticsearch_dsl.Float()
    best_val_acc = elasticsearch_dsl.Float()
    final_val_loss = elasticsearch_dsl.Float()
    best_val_loss = elasticsearch_dsl.Float()
    final_val_sensitivity = elasticsearch_dsl.Float()
    best_val_sensitivity = elasticsearch_dsl.Float()
    final_val_specificity = elasticsearch_dsl.Float()
    best_val_specificity = elasticsearch_dsl.Float()
    final_val_auc = elasticsearch_dsl.Float()
    best_val_auc = elasticsearch_dsl.Float()

    # Params
    batch_size = elasticsearch_dsl.Integer()
    val_split = elasticsearch_dsl.Float()
    seed = elasticsearch_dsl.Integer()

    rotation_range = elasticsearch_dsl.Float()
    width_shift_range = elasticsearch_dsl.Float()
    height_shift_range = elasticsearch_dsl.Float()
    shear_range = elasticsearch_dsl.Float()
    zoom_range = elasticsearch_dsl.Keyword()
    horizontal_flip = elasticsearch_dsl.Boolean()
    vertical_flip = elasticsearch_dsl.Boolean()

    dropout_rate1 = elasticsearch_dsl.Float()
    dropout_rate2 = elasticsearch_dsl.Float()

    data_dir = elasticsearch_dsl.Keyword()
    gcs_url = elasticsearch_dsl.Keyword()

    mip_thickness = elasticsearch_dsl.Integer()
    height_offset = elasticsearch_dsl.Integer()
    pixel_value_range = elasticsearch_dsl.Keyword()

    # We need to keep a list of params for the parser because
    # we can't use traditional approaches to get the class attrs
    params_to_parse = [
        'batch_size', 'val_split', 'seed', 'rotation_range',
        'width_shift_range', 'height_shift_range', 'shear_range', 'zoom_range',
        'horizontal_flip', 'vertical_flip', 'dropout_rate1', 'dropout_rate2',
        'data_dir', 'gcs_url', 'mip_thickness', 'height_offset',
        'pixel_value_range'
    ]

    class Index:
        name = TRAINING_JOBS
Exemple #2
0
class GameSummary(elasticsearch_dsl.Document):
    """Game search model"""

    id = elasticsearch_dsl.Text()
    name = elasticsearch_dsl.Text()
    isPublic = elasticsearch_dsl.Boolean()
    players = elasticsearch_dsl.Object(PlayersInGame)

    class Index:  # pylint: disable=missing-class-docstring
        name = "games"
Exemple #3
0
class Pand(es.DocType):
    id = es.Keyword()
    landelijk_id = es.Text(analyzer=analyzers.autocomplete,
                           fields={
                               'raw': es.Keyword(),
                               'nozero': es.Text(analyzer=analyzers.nozero)
                           })
    pandnaam = es.Text(analyzer=analyzers.adres, fields=naam_fields)
    _display = es.Keyword()

    class Index:
        name = settings.ELASTIC_INDICES['BAG_PAND']
Exemple #4
0
class CollectionDocument(BaseDocument):
    """Document for collection search."""

    # Data values extracted from the descriptor.
    descriptor_data = dsl.Text(multi=True)
    tags = dsl.Keyword(multi=True)
    description = dsl.Text(fielddata=True)

    class Index:
        """Meta class for collection search document."""

        name = "collection"
class FeatureSearchDocument(BaseDocument):
    """Index for feature search."""

    # pylint: disable=no-member
    source = dsl.Keyword()
    feature_id = dsl.Keyword(
        # Additional subfield used for boosting during autocomplete.
        fields={
            'lower': {
                'type': 'text',
                'analyzer': identifier_analyzer
            },
            'ngrams': {
                'type': 'text',
                'analyzer': autocomplete_analyzer,
                'search_analyzer': autocomplete_search_analyzer,
            },
        }, )
    species = dsl.Keyword()
    type = dsl.Keyword()  # pylint: disable=invalid-name
    sub_type = dsl.Keyword(index=False)
    name = dsl.Keyword(
        # Additional subfield used for boosting during autocomplete.
        fields={
            'lower': {
                'type': 'text',
                'analyzer': identifier_analyzer
            },
            'ngrams': {
                'type': 'text',
                'analyzer': autocomplete_analyzer,
                'search_analyzer': autocomplete_search_analyzer,
            },
        }, )
    full_name = dsl.Text(index=False)
    description = dsl.Text(index=False)
    aliases = dsl.Keyword(
        multi=True,
        # Additional subfield used for boosting during autocomplete.
        fields={
            'ngrams': {
                'type': 'text',
                'analyzer': autocomplete_analyzer,
                'search_analyzer': autocomplete_search_analyzer,
            },
        },
    )

    class Meta:
        """Meta class for feature search document."""

        index = 'feature_search'
class DataDocType(es.DocType):
    """Elasticsearch test model"""
    first_name = es.Keyword()
    last_name = es.Keyword()
    city = es.Text()
    skills = es.Keyword()
    birthday = es.Date()
    is_active = es.Boolean()
    score = es.Integer()
    description = es.Text()

    class Meta:
        index = 'test'
Exemple #7
0
class Text(es.InnerDoc):
    """Simple Elasticsearch DSL mapping of the text data this plugin will return."""

    full_text = es.Text()
    pattern_polarity = es.Float()
    pattern_subjectivity = es.Float()
    short_text = es.Text()
    translated = es.Text()
    truncated = es.Boolean()
    tweet_length = es.Integer()
    vader_compound = es.Float()
    vader_compound_inverted = es.Float()
    vader_negative = es.Float()
    vader_neutral = es.Float()
    vader_positive = es.Float()
Exemple #8
0
class Location(es.InnerDoc):  # pytlint: disable = too-few-public-methods
    """
    InnerDoc mapping of location information embedded within a tweet.

    This data is created by us during the processing pipeline.
    """

    city = es.Keyword(doc_values=True)
    country = es.Keyword(doc_values=True)
    county = es.Keyword(doc_values=True)
    id = es.Text()
    latitude = es.Text()
    longitude = es.Text()
    resolution_method = es.Text()
    state = es.Keyword(doc_values=True)
Exemple #9
0
class DataDocType(es.Document):
    """Elasticsearch test model"""
    first_name = es.Keyword()
    last_name = es.Keyword()
    city = es.Text()
    skills = es.Keyword()
    birthday = es.Date()
    is_active = es.Boolean()
    score = es.Integer()
    location = es.GeoPoint()
    description = es.Text()


    class Index:
        name = 'test'
Exemple #10
0
def document_field(field):
    """
    The default ``field_factory`` method for converting Django field instances to ``elasticsearch_dsl.Field`` instances.
    Auto-created fields (primary keys, for example) and one-to-many fields (reverse FK relationships) are skipped.
    """
    if field.auto_created or field.one_to_many:
        return None
    if field.many_to_many:
        return RawMultiString
    defaults = {
        models.DateField:
        dsl.Date(),
        models.DateTimeField:
        dsl.Date(),
        models.IntegerField:
        dsl.Long(),
        models.PositiveIntegerField:
        dsl.Long(),
        models.BooleanField:
        dsl.Boolean(),
        models.NullBooleanField:
        dsl.Boolean(),
        #        models.SlugField: dsl.String(index='not_analyzed'),
        models.SlugField:
        dsl.Text(index='not_analyzed'),
        models.DecimalField:
        dsl.Double(),
        models.FloatField:
        dsl.Float(),
    }
    return defaults.get(field.__class__, RawString)
Exemple #11
0
class ExtendedDataDocument(object):
    """Data ES document extensions."""

    source = dsl.Keyword()
    species = dsl.Text()
    build = dsl.Keyword()
    feature_type = dsl.Keyword()
Exemple #12
0
class Node(es.DocType):
    """
    Elastic document describing user
    """

    node_type = es.Keyword()

    objectID = es.Keyword()

    name = es.Text(
        fielddata=True,
        analyzer=autocomplete
    )

    user = es.Object(
        fields={
            'id': es.Keyword(),
            'name': es.Text(
                fielddata=True,
                analyzer=autocomplete)
        }
    )

    description = es.Text()

    is_free = es.Boolean()

    project = es.Object(
        fields={
            'id': es.Keyword(),
            'name': es.Keyword(),
            'url': es.Keyword(),
        }
    )

    media = es.Keyword()

    picture = es.Keyword()

    tags = es.Keyword(multi=True)
    license_notes = es.Text()

    created_at = es.Date()
    updated_at = es.Date()

    class Meta:
        index = 'nodes'
Exemple #13
0
class Monument(es.Document):
    """
    Elastic data for Monument
    """
    id = es.Keyword()
    type = es.Keyword()
    naam = es.Text(fielddata=True,
                   analyzer=analyzers.monument_naam,
                   fields={
                       'raw':
                       es.Text(fielddata=True),
                       'keyword':
                       es.Text(fielddata=True, analyzer=analyzers.subtype)
                   })

    class Index:
        name = settings.ELASTIC_INDICES['MONUMENTEN']
Exemple #14
0
class User(es.DocType):
    """Elastic document describing user."""

    objectID = es.Keyword()

    username = es.Text(fielddata=True, analyzer=autocomplete)
    username_exact = es.Keyword()
    full_name = es.Text(fielddata=True, analyzer=autocomplete)

    roles = es.Keyword(multi=True)
    groups = es.Keyword(multi=True)

    email = es.Text(fielddata=True, analyzer=autocomplete)
    email_exact = es.Keyword()

    class Meta:
        index = 'users'
Exemple #15
0
class LawSuitModel(es.Document):
    class Index:
        name = 'law_go_kr'

    case_id = es.Keyword()
    index_data = es.Keyword()
    detail_data_html = es.Text()
    detail_data_searchable = es.Keyword()
Exemple #16
0
class KadastraalSubject(es.DocType):
    naam = es.Text(
        analyzer=analyzers.naam,
        fields={
            'raw': es.Keyword(),
            'ngram': es.Text(
                analyzer=analyzers.kad_sbj_naam,
                search_analyzer=analyzers.kad_obj_aanduiding_keyword)})

    natuurlijk_persoon = es.Boolean()
    geslachtsnaam = es.Text(analyzer=analyzers.naam)
    order = es.Integer()

    subtype = es.Keyword()
    _display = es.Keyword()

    class Index:
        name = settings.ELASTIC_INDICES['BRK_SUBJECT']
Exemple #17
0
class TagDoc(DocType):
    id = edsl.Integer(required=True)
    name = edsl.Text(copy_to=ALL_DATA_FIELD)

    @classmethod
    def from_instance(cls, instance):
        doc = cls(meta={'id': instance.id}, id=instance.id, name=instance.name)
        return doc.to_dict(include_meta=True)

    class Index:
        name = 'tag'
Exemple #18
0
class CollectionDocument(BaseDocument):
    """Document for collection search."""

    # Data values extracted from the descriptor.
    descriptor_data = dsl.Text(multi=True)
    tags = dsl.Keyword(multi=True)

    class Meta:
        """Meta class for collection search document."""

        index = 'collection'
Exemple #19
0
class Gebied(es.DocType):
    """
    Een vindbaar gebied

    Unesco
    Buurt
    Buurtcombinatie
    Stadsdeel
    Grootstedelijk
    Gemeente
    Woonplaats
    """

    id = es.Keyword()

    _display = es.Keyword()

    naam = es.Text(analyzer=analyzers.adres, fields=text_fields)

    naam_nen = es.Text(analyzer=analyzers.adres, fields=text_fields)

    naam_ptt = es.Text(analyzer=analyzers.adres, fields=text_fields)

    postcode = es.Text(analyzer=analyzers.postcode, fields=postcode_fields)

    g_code = es.Text(analyzer=analyzers.autocomplete,
                     search_analyzer='standard',
                     fields={
                         'keyword': es.Keyword(),
                         'ngram': es.Text(analyzer=analyzers.autocomplete),
                     })

    # gebied order
    order = es.Integer()

    subtype = es.Keyword()
    type = es.Keyword()

    centroid = es.GeoPoint()

    landelijk_id = es.Text(  # Only for voor openbare_ruimte
        analyzer=analyzers.autocomplete,
        fields={
            'raw': es.Keyword(),
            'nozero': es.Text(analyzer=analyzers.nozero)
        })

    gsg_type = es.Keyword()

    class Index:
        name = settings.ELASTIC_INDICES['BAG_GEBIED']
Exemple #20
0
class TestAnalyzerSearchDocument(BaseDocument):
    name = dsl.Text(analyzer=dsl.analyzer(
        "test_analyzer",
        tokenizer="keyword",
        filter=[
            "lowercase",
        ],
        fielddata=True,
    ))

    class Index:
        name = "test_analyzer_search"
class TestAnalyzerSearchDocument(BaseDocument):
    name = dsl.Text(analyzer=dsl.analyzer(
        'test_analyzer',
        tokenizer='keyword',
        filter=[
            'lowercase',
        ],
        fielddata=True,
    ))

    class Index:
        name = 'test_analyzer_search'
Exemple #22
0
class Dictionary(es.Document):
    corpus = es.Keyword()
    name = es.Keyword()
    description = es.Text()
    datetime = es.Date()
    number_of_documents = es.Integer()

    is_ready = es.Boolean()

    class Index:
        name = ES_INDEX_DICTIONARY_INDEX
        using = ES_CLIENT
Exemple #23
0
class SponsorDoc(DocType):
    id = edsl.Integer(required=True)
    name = edsl.Text(copy_to=ALL_DATA_FIELD,
                     analyzer=autocomplete_analyzer,
                     search_analyzer='standard')

    @classmethod
    def from_instance(cls, instance):
        doc = cls(meta={'id': instance.id}, id=instance.id, name=instance.name)
        return doc.to_dict(include_meta=True)

    class Index:
        name = 'sponsor'
Exemple #24
0
class TestAnalyzerSearchDocument(BaseDocument):
    # pylint: disable=no-member
    name = dsl.Text(analyzer=dsl.analyzer(
        'test_analyzer',
        tokenizer='keyword',
        filter=[
            'lowercase',
        ],
        fielddata=True,
    ))

    class Meta:
        index = 'test_analyzer_search'
Exemple #25
0
class TestSearchDocument(BaseDocument):
    id = dsl.Integer()
    name = dsl.Text(fielddata=True)
    num = dsl.Integer()
    date = dsl.Date()
    json = dsl.Object()

    field_name = Name()
    field_process_type = ProcessType()
    none_test = dsl.Integer()

    class Index:
        name = "test_search"
Exemple #26
0
        def decorator(cls):
            print("setup_schema:" + cls.__name__.lower())
            #
            # create an elastic model from the schema
            #
            # there are two special keys you can use additionally to the
            # standard cerberus syntx:
            # "elastic" :   add any Elastic DSL "Column" __init__ kwargs here, they will be handed raw
            #               to the Column __init__
            # "elastictype" : add a more specific elasticserach_dsl type definition (Text instead of string)
            # the two special keys will be removed from the schema at the end of this
            # decorator.
            #    

            #
            # now set the right elastic types for the doc
            #
            from datetime import datetime
            #from elasticsearch_dsl import DocType, String, Date, Nested, Boolean, Integer\
            #    Float, Byte, Text, analyzer, InnerObjectWrapper, Completion
            import elasticsearch_dsl
            
            for elem in cls.schema.keys():
                #print(elem)
                # the raw Column __init__ parameters dict
                elastic=cls.schema[elem].get("elastic", {})
                if cls.schema[elem]["type"] == "integer":
                    setattr(cls, elem, elasticsearch_dsl.Integer(**elastic))
                elif cls.schema[elem]["type"] == "float":
                    setattr(cls, elem, elasticsearch_dsl.Float(**elastic))
                elif cls.schema[elem]["type"] == "string":
                    setattr(cls, elem, elasticsearch_dsl.Text(**elastic))
                elif cls.schema[elem]["type"] == "bool":
                    setattr(cls, elem, elasticsearch_dsl.Boolean(**elastic))
                elif cls.schema[elem]["type"] == "date":
                    setattr(cls, elem, elasticsearch_dsl.Date(**elastic))
                elif cls.schema[elem]["type"] == "datetime":
                    setattr(cls, elem, elasticsearch_dsl.Date(**elastic))
                elif cls.schema[elem]["type"] == "number":
                    setattr(cls, elem, elasticsearch_dsl.Integer(**elastic))
                elif cls.schema[elem]["type"] == "binary":
                    setattr(cls, elem, elasticsearch_dsl.Byte(**elastic))
                elif cls.schema[elem]["type"] == "list":
                    setattr(cls, elem, elasticsearch_dsl.Keyword(**elastic))
                else:
                    raise Exception("Wrong Datatype in schema") 
                #print("  .. removing the schema (raw) elastic key(s)")
                cls.schema[elem].pop("elastic", None)
                cls.schema[elem].pop("elastictype", None)

            return cls
Exemple #27
0
class Mappings(es.Document):
    threshold = es.Keyword()
    meta_dtm_name = es.Keyword()
    topic_modelling_first = es.Keyword()
    topic_modelling_second = es.Keyword()
    topic_modelling_first_from = es.Date(),
    topic_modelling_second_to = es.Date(),
    mappings_dict = es.Text()
    scores_list = es.Keyword()
    delta_words_dict = es.Text()
    delta_count_dict = es.Text()

    class Index:
        name = ES_INDEX_MAPPINGS
        using = ES_CLIENT

        settings = {
            "index.mapping.total_fields.limit": 5000,
            "number_of_shards": 1,
            "number_of_replicas": 1,
        }

        mappings = {
            "properties": {
                "threshold": {
                    "type": "keyword",
                },
                "meta_dtm_name": {
                    "type": "keyword",
                },
                "topic_modelling_first_from": {
                    "type": "date"
                },
                "topic_modelling_second_to": {
                    "type": "date"
                }
            },
        }
Exemple #28
0
class TestSearchDocument(BaseDocument):
    # pylint: disable=no-member
    id = dsl.Integer()  # pylint: disable=invalid-name
    name = dsl.Text(fielddata=True)
    num = dsl.Integer()
    date = dsl.Date()
    json = dsl.Object()

    field_name = Name()
    field_process_type = ProcessType()
    none_test = dsl.Integer()

    class Meta:
        index = 'test_search'
Exemple #29
0
class Bouwblok(es.DocType):
    """
    Bouwblok searchable fields.
    """
    code = es.Text(
        analyzer=analyzers.bouwblokid,
        fields={'keyword': es.Keyword()},
    )

    subtype = es.Keyword()

    _display = es.Keyword()

    class Index:
        name = settings.ELASTIC_INDICES['BAG_BOUWBLOK']
Exemple #30
0
class ContainerDoc(DocType):
    id = edsl.Integer(required=True)
    name = edsl.Text(copy_to=ALL_DATA_FIELD,
                     analyzer=autocomplete_analyzer,
                     search_analyzer='standard')
    issn = edsl.Keyword()

    @classmethod
    def from_instance(cls, container):
        doc = cls(meta={'id': container.id},
                  id=container.id,
                  name=container.name,
                  issn=container.issn)
        return doc.to_dict(include_meta=True)

    class Index:
        name = 'container'