예제 #1
0
class TopicDocument(es.Document):
    topic_id = es.Keyword()
    topic_weight = es.Float()
    document_es_id = es.Keyword()
    datetime = es.Date()
    document_source = es.Keyword()
    document_corpus = es.Keyword()
    document_num_views = es.Integer()
    document_num_comments = es.Integer()

    class Index:
        name = ES_INDEX_TOPIC_DOCUMENT  # f"{ES_INDEX_TOPIC_DOCUMENT}_{tm}"
        using = ES_CLIENT

        settings = {
            "number_of_shards": 3,
            "number_of_replicas": 1,
            "max_result_window": 5000000,
        }
        settings_dynamic = {
            "number_of_shards": 2,
            "number_of_replicas": 1,
            "max_result_window": 5000000,
        }
        mappings = {
            "properties": {
                "datetime": {
                    "type": "date"
                },
                "document_es_id": {
                    "type": "keyword",
                },
                "document_source": {
                    "type": "keyword",
                },
                "document_corpus": {
                    "type": "keyword",
                },
                "document_num_views": {
                    "type": "long",
                },
                "document_num_comments": {
                    "type": "long",
                },
                "topic_id": {
                    "type": "keyword",
                },
                "topic_weight": {
                    "type": "float"
                }
            }
        }
예제 #2
0
class TestSearchDocument(BaseDocument):
    id = dsl.Integer()
    name = dsl.Text(fielddata=True)
    num = dsl.Integer()
    date = dsl.Date()
    json = dsl.Object()

    field_name = Name()
    field_process_type = ProcessType()
    none_test = dsl.Integer()

    class Index:
        name = "test_search"
예제 #3
0
class TestSearchDocument(BaseDocument):
    # pylint: disable=no-member
    id = dsl.Integer()  # pylint: disable=invalid-name
    name = dsl.String()
    num = dsl.Integer()
    json = dsl.Object()

    field_name = Name()
    field_process_type = ProcessType()
    none_test = dsl.Integer()

    class Meta:
        index = 'test_search'
예제 #4
0
        def decorator(cls):
            print("setup_schema:" + cls.__name__.lower())
            #
            # create an elastic model from the schema
            #
            # there are two special keys you can use additionally to the
            # standard cerberus syntx:
            # "elastic" :   add any Elastic DSL "Column" __init__ kwargs here, they will be handed raw
            #               to the Column __init__
            # "elastictype" : add a more specific elasticserach_dsl type definition (Text instead of string)
            # the two special keys will be removed from the schema at the end of this
            # decorator.
            #    

            #
            # now set the right elastic types for the doc
            #
            from datetime import datetime
            #from elasticsearch_dsl import DocType, String, Date, Nested, Boolean, Integer\
            #    Float, Byte, Text, analyzer, InnerObjectWrapper, Completion
            import elasticsearch_dsl
            
            for elem in cls.schema.keys():
                #print(elem)
                # the raw Column __init__ parameters dict
                elastic=cls.schema[elem].get("elastic", {})
                if cls.schema[elem]["type"] == "integer":
                    setattr(cls, elem, elasticsearch_dsl.Integer(**elastic))
                elif cls.schema[elem]["type"] == "float":
                    setattr(cls, elem, elasticsearch_dsl.Float(**elastic))
                elif cls.schema[elem]["type"] == "string":
                    setattr(cls, elem, elasticsearch_dsl.Text(**elastic))
                elif cls.schema[elem]["type"] == "bool":
                    setattr(cls, elem, elasticsearch_dsl.Boolean(**elastic))
                elif cls.schema[elem]["type"] == "date":
                    setattr(cls, elem, elasticsearch_dsl.Date(**elastic))
                elif cls.schema[elem]["type"] == "datetime":
                    setattr(cls, elem, elasticsearch_dsl.Date(**elastic))
                elif cls.schema[elem]["type"] == "number":
                    setattr(cls, elem, elasticsearch_dsl.Integer(**elastic))
                elif cls.schema[elem]["type"] == "binary":
                    setattr(cls, elem, elasticsearch_dsl.Byte(**elastic))
                elif cls.schema[elem]["type"] == "list":
                    setattr(cls, elem, elasticsearch_dsl.Keyword(**elastic))
                else:
                    raise Exception("Wrong Datatype in schema") 
                #print("  .. removing the schema (raw) elastic key(s)")
                cls.schema[elem].pop("elastic", None)
                cls.schema[elem].pop("elastictype", None)

            return cls
예제 #5
0
class TopicCombo(es.Document):
    topics = es.Object()
    common_docs_ids = es.Keyword()
    common_docs_num = es.Integer()

    class Index:
        name = ES_INDEX_TOPIC_COMBOS  # f"{ES_INDEX_TOPIC_COMBOS}_{tm}"
        using = ES_CLIENT

        settings = {
            "number_of_shards": 2,
            "number_of_replicas": 1,
            "max_result_window": 5000000,
        }
        mappings = {
            "properties": {
                "topics": {
                    "type": "object"
                },
                "common_docs_ids": {
                    "type": "keyword",
                },
                "common_docs_num": {
                    "type": "integer",
                },
            }
        }
예제 #6
0
파일: base.py 프로젝트: tjanez/resolwe
class BaseDocument(indices.BaseDocument):
    """Base search document."""

    id = dsl.Integer()  # pylint: disable=invalid-name
    slug = Slug()
    version = dsl.Keyword()
    name = Name()
    created = dsl.Date()
    modified = dsl.Date()
    contributor_id = dsl.Integer()
    contributor_name = User()
    # We use a separate field for contributor sorting because we use an entirely
    # different value for it (the display name).
    contributor_sort = dsl.Keyword()
    owner_ids = dsl.Integer(multi=True)
    owner_names = User(multi=True)
예제 #7
0
class TestSearchDocument(BaseDocument):
    # pylint: disable=no-member
    name = dsl.String()
    num = dsl.Integer()
    json = dsl.Object()

    class Meta:
        index = 'test_search'
예제 #8
0
class TrainingJob(elasticsearch_dsl.Document):
    id = elasticsearch_dsl.Integer()
    schema_version = elasticsearch_dsl.Integer()
    job_name = elasticsearch_dsl.Keyword()
    author = elasticsearch_dsl.Keyword()
    created_at = elasticsearch_dsl.Date()
    ended_at = elasticsearch_dsl.Date()
    params = elasticsearch_dsl.Text()
    raw_log = elasticsearch_dsl.Text()
    model_url = elasticsearch_dsl.Text()

    # Metrics
    epochs = elasticsearch_dsl.Integer()
    train_acc = elasticsearch_dsl.Float()
    final_val_acc = elasticsearch_dsl.Float()
    best_val_acc = elasticsearch_dsl.Float()
    final_val_loss = elasticsearch_dsl.Float()
    best_val_loss = elasticsearch_dsl.Float()
    final_val_sensitivity = elasticsearch_dsl.Float()
    best_val_sensitivity = elasticsearch_dsl.Float()
    final_val_specificity = elasticsearch_dsl.Float()
    best_val_specificity = elasticsearch_dsl.Float()
    final_val_auc = elasticsearch_dsl.Float()
    best_val_auc = elasticsearch_dsl.Float()

    # Params
    batch_size = elasticsearch_dsl.Integer()
    val_split = elasticsearch_dsl.Float()
    seed = elasticsearch_dsl.Integer()

    rotation_range = elasticsearch_dsl.Float()
    width_shift_range = elasticsearch_dsl.Float()
    height_shift_range = elasticsearch_dsl.Float()
    shear_range = elasticsearch_dsl.Float()
    zoom_range = elasticsearch_dsl.Keyword()
    horizontal_flip = elasticsearch_dsl.Boolean()
    vertical_flip = elasticsearch_dsl.Boolean()

    dropout_rate1 = elasticsearch_dsl.Float()
    dropout_rate2 = elasticsearch_dsl.Float()

    data_dir = elasticsearch_dsl.Keyword()
    gcs_url = elasticsearch_dsl.Keyword()

    mip_thickness = elasticsearch_dsl.Integer()
    height_offset = elasticsearch_dsl.Integer()
    pixel_value_range = elasticsearch_dsl.Keyword()

    # We need to keep a list of params for the parser because
    # we can't use traditional approaches to get the class attrs
    params_to_parse = [
        'batch_size', 'val_split', 'seed', 'rotation_range',
        'width_shift_range', 'height_shift_range', 'shear_range', 'zoom_range',
        'horizontal_flip', 'vertical_flip', 'dropout_rate1', 'dropout_rate2',
        'data_dir', 'gcs_url', 'mip_thickness', 'height_offset',
        'pixel_value_range'
    ]

    class Index:
        name = TRAINING_JOBS
예제 #9
0
파일: entity.py 프로젝트: fagan2888/resolwe
class EntityDocument(CollectionDocument):
    """Document for entity search."""

    collection = dsl.Integer()
    type = dsl.Keyword()

    class Index:
        """Meta class for entity search document."""

        name = "entity"
예제 #10
0
파일: entity.py 프로젝트: tjanez/resolwe
class EntityDocument(CollectionDocument):
    """Document for entity search."""

    descriptor_completed = dsl.Boolean()
    collections = dsl.Integer(multi=True)

    class Meta:
        """Meta class for entity search document."""

        index = 'entity'
예제 #11
0
class PhotoDocument(esd.DocType):
    date = esd.Date()
    aperture = esd.Float()
    exposure = esd.Float()
    focal_length = esd.Float()
    focal_length_35 = esd.Float()
    iso = esd.Integer()
    size = esd.Integer()
    model = esd.String(index='not_analyzed') #analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ]))
    model_ci = esd.String(analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ]))
    lens = esd.String(index='not_analyzed')
    lens_ci = esd.String(analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ]))
    path = esd.String(index='not_analyzed')
    dirname = esd.String(index='not_analyzed')
    basename = esd.String(index='not_analyzed')

    def extended_dict(self):
        dct = self.to_dict()
        dct["id"] = self.meta.id
        return dct
예제 #12
0
class TagDoc(DocType):
    id = edsl.Integer(required=True)
    name = edsl.Text(copy_to=ALL_DATA_FIELD)

    @classmethod
    def from_instance(cls, instance):
        doc = cls(meta={'id': instance.id}, id=instance.id, name=instance.name)
        return doc.to_dict(include_meta=True)

    class Index:
        name = 'tag'
예제 #13
0
파일: data.py 프로젝트: mzagmajster/resolwe
class DataDocument(BaseDocument):
    """Document for data search."""

    started = dsl.Date()
    finished = dsl.Date()
    status = dsl.Keyword()
    process = dsl.Integer()
    process_type = ProcessType()
    # Keep backward compatibility.
    type = ProcessType()
    process_name = Name()
    tags = dsl.Keyword(multi=True)

    collection = dsl.Integer()
    entity = dsl.Integer()

    class Index:
        """Meta class for data search document."""

        name = 'data'
예제 #14
0
class GroupDocument(esd.DocType):
    date = esd.Date()
    aperture = esd.Float()
    exposure = esd.Float()
    focal_length = esd.Float()
    focal_length_35 = esd.Float()
    iso = esd.Integer()
    model = esd.String(index='not_analyzed') #analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ]))
    lens = esd.String(index='not_analyzed')
    path = esd.String(index='not_analyzed')
    dirname = esd.String(index='not_analyzed')
    basename = esd.String(index='not_analyzed')
예제 #15
0
class Dictionary(es.Document):
    corpus = es.Keyword()
    name = es.Keyword()
    description = es.Text()
    datetime = es.Date()
    number_of_documents = es.Integer()

    is_ready = es.Boolean()

    class Index:
        name = ES_INDEX_DICTIONARY_INDEX
        using = ES_CLIENT
class DataDocType(es.DocType):
    """Elasticsearch test model"""
    first_name = es.Keyword()
    last_name = es.Keyword()
    city = es.Text()
    skills = es.Keyword()
    birthday = es.Date()
    is_active = es.Boolean()
    score = es.Integer()
    description = es.Text()

    class Meta:
        index = 'test'
예제 #17
0
파일: data.py 프로젝트: mstajdohar/resolwe
class DataDocument(BaseDocument):
    """Document for data search."""

    started = dsl.Date()
    finished = dsl.Date()
    status = dsl.Keyword()
    process = dsl.Integer()
    process_type = ProcessType()
    # Keep backward compatibility.
    type = ProcessType()  # pylint: disable=invalid-name
    process_name = Name()
    tags = dsl.Keyword(multi=True)

    collection = dsl.Integer(multi=True)
    parents = dsl.Integer(multi=True)
    children = dsl.Integer(multi=True)
    entity = dsl.Integer(multi=True)

    class Meta:
        """Meta class for data search document."""

        index = 'data'
예제 #18
0
class SponsorDoc(DocType):
    id = edsl.Integer(required=True)
    name = edsl.Text(copy_to=ALL_DATA_FIELD,
                     analyzer=autocomplete_analyzer,
                     search_analyzer='standard')

    @classmethod
    def from_instance(cls, instance):
        doc = cls(meta={'id': instance.id}, id=instance.id, name=instance.name)
        return doc.to_dict(include_meta=True)

    class Index:
        name = 'sponsor'
예제 #19
0
class Gebied(es.DocType):
    """
    Een vindbaar gebied

    Unesco
    Buurt
    Buurtcombinatie
    Stadsdeel
    Grootstedelijk
    Gemeente
    Woonplaats
    """

    id = es.Keyword()

    _display = es.Keyword()

    naam = es.Text(analyzer=analyzers.adres, fields=text_fields)

    naam_nen = es.Text(analyzer=analyzers.adres, fields=text_fields)

    naam_ptt = es.Text(analyzer=analyzers.adres, fields=text_fields)

    postcode = es.Text(analyzer=analyzers.postcode, fields=postcode_fields)

    g_code = es.Text(analyzer=analyzers.autocomplete,
                     search_analyzer='standard',
                     fields={
                         'keyword': es.Keyword(),
                         'ngram': es.Text(analyzer=analyzers.autocomplete),
                     })

    # gebied order
    order = es.Integer()

    subtype = es.Keyword()
    type = es.Keyword()

    centroid = es.GeoPoint()

    landelijk_id = es.Text(  # Only for voor openbare_ruimte
        analyzer=analyzers.autocomplete,
        fields={
            'raw': es.Keyword(),
            'nozero': es.Text(analyzer=analyzers.nozero)
        })

    gsg_type = es.Keyword()

    class Index:
        name = settings.ELASTIC_INDICES['BAG_GEBIED']
예제 #20
0
class TopicModellingIndex(es.Document):
    corpus = es.Keyword()
    source = es.Keyword()
    number_of_documents = es.Integer()
    is_ready = es.Boolean()
    has_topic_info = es.Boolean()
    name = es.Keyword()
    description = es.Text()
    datetime_created = es.Date()
    datetime_finished = es.Date()

    datetime_from = es.Date()
    datetime_to = es.Date()

    algorithm = es.Keyword()
    number_of_topics = es.Integer()
    hierarchical = es.Boolean()
    meta_parameters = es.Object()

    perplexity = es.Float()
    purity = es.Float()
    contrast = es.Float()
    coherence = es.Float()

    tau_smooth_sparse_theta = es.Float()
    tau_smooth_sparse_phi = es.Float()
    tau_decorrelator_phi = es.Float()
    tau_coherence_phi = es.Float()

    topics = es.Nested(Topic)

    is_actualizable = es.Boolean()

    class Index:
        name = ES_INDEX_TOPIC_MODELLING
        using = ES_CLIENT
예제 #21
0
class Text(es.InnerDoc):
    """Simple Elasticsearch DSL mapping of the text data this plugin will return."""

    full_text = es.Text()
    pattern_polarity = es.Float()
    pattern_subjectivity = es.Float()
    short_text = es.Text()
    translated = es.Text()
    truncated = es.Boolean()
    tweet_length = es.Integer()
    vader_compound = es.Float()
    vader_compound_inverted = es.Float()
    vader_negative = es.Float()
    vader_neutral = es.Float()
    vader_positive = es.Float()
예제 #22
0
class sigpac_record(dsl.DocType):
    dn_pk = dsl.Long()

    provincia = dsl.Integer()
    municipio = dsl.Integer()
    poligono = dsl.Integer()
    parcela = dsl.Integer()
    recinto = dsl.Integer()
    zona = dsl.Integer()

    perimetro = dsl.Long()
    superficie = dsl.Long()
    pend_med = dsl.Integer()
    points = dsl.GeoShape()
    bbox = dsl.GeoShape()
    bbox_center = dsl.GeoPoint(lat_lon=True)

    uso_sigpac = dsl.String()

    agregado = dsl.Integer()
    cap_auto = dsl.Integer()
    cap_manual = dsl.Integer()
    coef_regadio = dsl.Float()
    c_refpar = dsl.String()
    c_refpol = dsl.String()
    c_refrec = dsl.String()
    dn_oid = dsl.Long()

    elevation = dsl.Float()

    def save(self, **kwargs):
        return super(sigpac_record, self).save(**kwargs)

    class Meta:
        index = 'plots'
        doc_type = 'sigpac'
예제 #23
0
class DataDocType(es.Document):
    """Elasticsearch test model"""
    first_name = es.Keyword()
    last_name = es.Keyword()
    city = es.Text()
    skills = es.Keyword()
    birthday = es.Date()
    is_active = es.Boolean()
    score = es.Integer()
    location = es.GeoPoint()
    description = es.Text()


    class Index:
        name = 'test'
예제 #24
0
class EmbeddingIndex(es.Document):
    corpus = es.Keyword()
    number_of_documents = es.Integer()
    is_ready = es.Boolean()
    name = es.Keyword()
    description = es.Text()
    datetime_created = es.Date()
    datetime_finished = es.Date()

    by_unit = es.Keyword()  # Token/Word/Sentence/Text
    algorithm = es.Keyword()
    pooling = es.Keyword()
    meta_parameters = es.Object()

    class Index:
        name = ES_INDEX_EMBEDDING
        using = ES_CLIENT
예제 #25
0
class ContainerDoc(DocType):
    id = edsl.Integer(required=True)
    name = edsl.Text(copy_to=ALL_DATA_FIELD,
                     analyzer=autocomplete_analyzer,
                     search_analyzer='standard')
    issn = edsl.Keyword()

    @classmethod
    def from_instance(cls, container):
        doc = cls(meta={'id': container.id},
                  id=container.id,
                  name=container.name,
                  issn=container.issn)
        return doc.to_dict(include_meta=True)

    class Index:
        name = 'container'
예제 #26
0
class KadastraalSubject(es.DocType):
    naam = es.Text(
        analyzer=analyzers.naam,
        fields={
            'raw': es.Keyword(),
            'ngram': es.Text(
                analyzer=analyzers.kad_sbj_naam,
                search_analyzer=analyzers.kad_obj_aanduiding_keyword)})

    natuurlijk_persoon = es.Boolean()
    geslachtsnaam = es.Text(analyzer=analyzers.naam)
    order = es.Integer()

    subtype = es.Keyword()
    _display = es.Keyword()

    class Index:
        name = settings.ELASTIC_INDICES['BRK_SUBJECT']
예제 #27
0
class KadastraalObject(es.DocType):
    aanduiding = es.Text(
        fielddata=True,
        analyzer=analyzers.postcode,
        fields=kad_text_fields)

    # The search aanduiding is the aanduiding without the "acd00 " prefix
    # remove this in future
    short_aanduiding = es.Text(
        analyzer=analyzers.kad_obj_aanduiding,
        search_analyzer='standard',
        fields=kad_text_fields)

    sectie = es.Text(
        fields=kad_text_fields,
    )

    objectnummer = es.Text(
        analyzer=analyzers.autocomplete,
        search_analyzer='standard',
        fields=kad_int_fields,
    )

    indexletter = es.Keyword(
        fields=kad_text_fields,
    )

    indexnummer = es.Text(
        analyzer=analyzers.autocomplete,
        search_analyzer='standard',
        fields=kad_int_fields
    )

    order = es.Integer()
    centroid = es.GeoPoint()

    gemeente = es.Text(analyzer=analyzers.naam)
    gemeente_code = es.Keyword(normalizer=analyzers.lowercase)

    subtype = es.Keyword()
    _display = es.Keyword()

    class Index:
        name = settings.ELASTIC_INDICES['BRK_OBJECT']
예제 #28
0
class InfoRiegoRecord(dsl.DocType):
    code = dsl.String()
    location = dsl.String()
    date = dsl.Date()
    rain = dsl.Float()
    temperature = dsl.Float()
    rel_humidity = dsl.Float()
    radiation = dsl.Float()
    wind_speed = dsl.Float()
    wind_direction = dsl.Float()

    lat_lon = dsl.GeoPoint(lat_lon=True)
    station_height = dsl.Integer()

    def save(self, **kwargs):
        return super(InfoRiegoRecord, self).save(**kwargs)

    class Meta:
        index = 'inforiego'
예제 #29
0
class AuthorDoc(DocType):
    id = edsl.Integer(required=True)
    orcid = edsl.Keyword()
    researcherid = edsl.Keyword()
    email = edsl.Keyword()
    name = edsl.Text(copy_to=ALL_DATA_FIELD,
                     analyzer=autocomplete_analyzer,
                     search_analyzer='standard')

    @classmethod
    def from_instance(cls, author):
        doc = cls(meta={'id': author.id},
                  id=author.id,
                  orcid=author.orcid,
                  researcherid=author.researcherid,
                  email=author.email,
                  name=author.name)
        return doc.to_dict(include_meta=True)

    class Index:
        name = 'author'
예제 #30
0
class Term(es.DocType):
    term = es.Text()
    gewicht = es.Integer()