Python Boolean Examples, elasticsearch_dsl.Boolean Python Examples

Example #1

0

Show file

class TrainingJob(elasticsearch_dsl.Document):
    id = elasticsearch_dsl.Integer()
    schema_version = elasticsearch_dsl.Integer()
    job_name = elasticsearch_dsl.Keyword()
    author = elasticsearch_dsl.Keyword()
    created_at = elasticsearch_dsl.Date()
    ended_at = elasticsearch_dsl.Date()
    params = elasticsearch_dsl.Text()
    raw_log = elasticsearch_dsl.Text()
    model_url = elasticsearch_dsl.Text()

    # Metrics
    epochs = elasticsearch_dsl.Integer()
    train_acc = elasticsearch_dsl.Float()
    final_val_acc = elasticsearch_dsl.Float()
    best_val_acc = elasticsearch_dsl.Float()
    final_val_loss = elasticsearch_dsl.Float()
    best_val_loss = elasticsearch_dsl.Float()
    final_val_sensitivity = elasticsearch_dsl.Float()
    best_val_sensitivity = elasticsearch_dsl.Float()
    final_val_specificity = elasticsearch_dsl.Float()
    best_val_specificity = elasticsearch_dsl.Float()
    final_val_auc = elasticsearch_dsl.Float()
    best_val_auc = elasticsearch_dsl.Float()

    # Params
    batch_size = elasticsearch_dsl.Integer()
    val_split = elasticsearch_dsl.Float()
    seed = elasticsearch_dsl.Integer()

    rotation_range = elasticsearch_dsl.Float()
    width_shift_range = elasticsearch_dsl.Float()
    height_shift_range = elasticsearch_dsl.Float()
    shear_range = elasticsearch_dsl.Float()
    zoom_range = elasticsearch_dsl.Keyword()
    horizontal_flip = elasticsearch_dsl.Boolean()
    vertical_flip = elasticsearch_dsl.Boolean()

    dropout_rate1 = elasticsearch_dsl.Float()
    dropout_rate2 = elasticsearch_dsl.Float()

    data_dir = elasticsearch_dsl.Keyword()
    gcs_url = elasticsearch_dsl.Keyword()

    mip_thickness = elasticsearch_dsl.Integer()
    height_offset = elasticsearch_dsl.Integer()
    pixel_value_range = elasticsearch_dsl.Keyword()

    # We need to keep a list of params for the parser because
    # we can't use traditional approaches to get the class attrs
    params_to_parse = [
        'batch_size', 'val_split', 'seed', 'rotation_range',
        'width_shift_range', 'height_shift_range', 'shear_range', 'zoom_range',
        'horizontal_flip', 'vertical_flip', 'dropout_rate1', 'dropout_rate2',
        'data_dir', 'gcs_url', 'mip_thickness', 'height_offset',
        'pixel_value_range'
    ]

    class Index:
        name = TRAINING_JOBS

Example #2

0

Show file

def document_field(field):
    """
    The default ``field_factory`` method for converting Django field instances to ``elasticsearch_dsl.Field`` instances.
    Auto-created fields (primary keys, for example) and one-to-many fields (reverse FK relationships) are skipped.
    """
    if field.auto_created or field.one_to_many:
        return None
    if field.many_to_many:
        return RawMultiString
    defaults = {
        models.DateField:
        dsl.Date(),
        models.DateTimeField:
        dsl.Date(),
        models.IntegerField:
        dsl.Long(),
        models.PositiveIntegerField:
        dsl.Long(),
        models.BooleanField:
        dsl.Boolean(),
        models.NullBooleanField:
        dsl.Boolean(),
        #        models.SlugField: dsl.String(index='not_analyzed'),
        models.SlugField:
        dsl.Text(index='not_analyzed'),
        models.DecimalField:
        dsl.Double(),
        models.FloatField:
        dsl.Float(),
    }
    return defaults.get(field.__class__, RawString)

Example #3

0

Show file

def doc_field(type):
    defaults = {
        'date': dsl.Date(),
        'integer': dsl.Long(),
        'boolean': dsl.Boolean(),
        'double': dsl.Double(),
        'float': dsl.Float(),
    }
    return defaults.get(type, RawString)

Example #4

0

Show file

class GameSummary(elasticsearch_dsl.Document):
    """Game search model"""

    id = elasticsearch_dsl.Text()
    name = elasticsearch_dsl.Text()
    isPublic = elasticsearch_dsl.Boolean()
    players = elasticsearch_dsl.Object(PlayersInGame)

    class Index:  # pylint: disable=missing-class-docstring
        name = "games"

Example #5

0

Show file

File: entity.py Project: tjanez/resolwe

class EntityDocument(CollectionDocument):
    """Document for entity search."""

    descriptor_completed = dsl.Boolean()
    collections = dsl.Integer(multi=True)

    class Meta:
        """Meta class for entity search document."""

        index = 'entity'

Example #6

0

Show file

class Dictionary(es.Document):
    corpus = es.Keyword()
    name = es.Keyword()
    description = es.Text()
    datetime = es.Date()
    number_of_documents = es.Integer()

    is_ready = es.Boolean()

    class Index:
        name = ES_INDEX_DICTIONARY_INDEX
        using = ES_CLIENT

Example #7

0

Show file

File: test_data.py Project: zhimingzhang123/django-rest-elasticsearch

class DataDocType(es.DocType):
    """Elasticsearch test model"""
    first_name = es.Keyword()
    last_name = es.Keyword()
    city = es.Text()
    skills = es.Keyword()
    birthday = es.Date()
    is_active = es.Boolean()
    score = es.Integer()
    description = es.Text()

    class Meta:
        index = 'test'

Example #8

0

Show file

        def decorator(cls):
            print("setup_schema:" + cls.__name__.lower())
            #
            # create an elastic model from the schema
            #
            # there are two special keys you can use additionally to the
            # standard cerberus syntx:
            # "elastic" :   add any Elastic DSL "Column" __init__ kwargs here, they will be handed raw
            #               to the Column __init__
            # "elastictype" : add a more specific elasticserach_dsl type definition (Text instead of string)
            # the two special keys will be removed from the schema at the end of this
            # decorator.
            #    

            #
            # now set the right elastic types for the doc
            #
            from datetime import datetime
            #from elasticsearch_dsl import DocType, String, Date, Nested, Boolean, Integer\
            #    Float, Byte, Text, analyzer, InnerObjectWrapper, Completion
            import elasticsearch_dsl
            
            for elem in cls.schema.keys():
                #print(elem)
                # the raw Column __init__ parameters dict
                elastic=cls.schema[elem].get("elastic", {})
                if cls.schema[elem]["type"] == "integer":
                    setattr(cls, elem, elasticsearch_dsl.Integer(**elastic))
                elif cls.schema[elem]["type"] == "float":
                    setattr(cls, elem, elasticsearch_dsl.Float(**elastic))
                elif cls.schema[elem]["type"] == "string":
                    setattr(cls, elem, elasticsearch_dsl.Text(**elastic))
                elif cls.schema[elem]["type"] == "bool":
                    setattr(cls, elem, elasticsearch_dsl.Boolean(**elastic))
                elif cls.schema[elem]["type"] == "date":
                    setattr(cls, elem, elasticsearch_dsl.Date(**elastic))
                elif cls.schema[elem]["type"] == "datetime":
                    setattr(cls, elem, elasticsearch_dsl.Date(**elastic))
                elif cls.schema[elem]["type"] == "number":
                    setattr(cls, elem, elasticsearch_dsl.Integer(**elastic))
                elif cls.schema[elem]["type"] == "binary":
                    setattr(cls, elem, elasticsearch_dsl.Byte(**elastic))
                elif cls.schema[elem]["type"] == "list":
                    setattr(cls, elem, elasticsearch_dsl.Keyword(**elastic))
                else:
                    raise Exception("Wrong Datatype in schema") 
                #print("  .. removing the schema (raw) elastic key(s)")
                cls.schema[elem].pop("elastic", None)
                cls.schema[elem].pop("elastictype", None)

            return cls

Example #9

0

Show file

File: geocode.py Project: ABitMoreDepth/twitterELK

class GeoCoding(PluginBase):
    """Class that will attempt to geotag a tweet."""

    data_schema = {
        'geotagged': es.Boolean(),
        'location': es.Object(Location),
        'coordinates': es.GeoPoint(),
    }

    def __init__(self, *args, **kwargs) -> None:
        """Setup Carmen geotagging options, then init super."""
        with warnings.catch_warnings():
            # The default setup of carmen appears to raise several warnings, we
            # suppress them with the catch_warnings context manager.
            warnings.simplefilter("ignore")
            resolver_options = {'place': {'allow_unknown_locations': True}}
            self.geotagger = get_resolver(options=resolver_options)
            self.geotagger.load_locations()
            self.location_resolver = LocationEncoder()

        super().__init__(*args, **kwargs)  # type: ignore

    def process_tweet(self, tweet_json: Dict[str, Any]) -> Dict[str, Any]:
        """
        Attempt to geotag the tweet data.

        Returns the tweet with new data if any was resolved and will set
        geotagged according to success or failure.
        """
        LOG.debug('Attempting to geotag tweet')
        tweet_location = self.geotagger.resolve_tweet(tweet_json['_raw'])

        tweet_json['geotagged'] = False

        if tweet_location:
            LOG.debug('  This tweet includes location information')
            tweet_json['location'] = self.location_resolver.default(
                tweet_location[1])

            if 'latitude' in tweet_json[
                    'location'] and 'longitude' in tweet_json['location']:
                tweet_json['coordinates'] = {
                    'lat': tweet_json['location']['latitude'],
                    'lon': tweet_json['location']['longitude'],
                }

                tweet_json['geotagged'] = True
                LOG.debug('Geotagging completed!')

        return tweet_json

Example #10

0

Show file

class DataDocType(es.Document):
    """Elasticsearch test model"""
    first_name = es.Keyword()
    last_name = es.Keyword()
    city = es.Text()
    skills = es.Keyword()
    birthday = es.Date()
    is_active = es.Boolean()
    score = es.Integer()
    location = es.GeoPoint()
    description = es.Text()


    class Index:
        name = 'test'

Example #11

0

Show file

class TopicModellingIndex(es.Document):
    corpus = es.Keyword()
    source = es.Keyword()
    number_of_documents = es.Integer()
    is_ready = es.Boolean()
    has_topic_info = es.Boolean()
    name = es.Keyword()
    description = es.Text()
    datetime_created = es.Date()
    datetime_finished = es.Date()

    datetime_from = es.Date()
    datetime_to = es.Date()

    algorithm = es.Keyword()
    number_of_topics = es.Integer()
    hierarchical = es.Boolean()
    meta_parameters = es.Object()

    perplexity = es.Float()
    purity = es.Float()
    contrast = es.Float()
    coherence = es.Float()

    tau_smooth_sparse_theta = es.Float()
    tau_smooth_sparse_phi = es.Float()
    tau_decorrelator_phi = es.Float()
    tau_coherence_phi = es.Float()

    topics = es.Nested(Topic)

    is_actualizable = es.Boolean()

    class Index:
        name = ES_INDEX_TOPIC_MODELLING
        using = ES_CLIENT

Example #12

0

Show file

class Text(es.InnerDoc):
    """Simple Elasticsearch DSL mapping of the text data this plugin will return."""

    full_text = es.Text()
    pattern_polarity = es.Float()
    pattern_subjectivity = es.Float()
    short_text = es.Text()
    translated = es.Text()
    truncated = es.Boolean()
    tweet_length = es.Integer()
    vader_compound = es.Float()
    vader_compound_inverted = es.Float()
    vader_negative = es.Float()
    vader_neutral = es.Float()
    vader_positive = es.Float()

Example #13

0

Show file

class BaseDocument(dsl.DocType):
    """Base document class to build ElasticSearch documents.

    This is standard ``elasticsearch-dsl`` ``DocType`` class with
    already added fields for handling permissions.

    """

    #: list of user ids with view permission on the object
    users_with_permissions = dsl.Keyword(multi=True)

    #: list of group ids with view permission on the object
    groups_with_permissions = dsl.Keyword(multi=True)

    #: identifies if object has public view permission assigned
    public_permission = dsl.Boolean()

Example #14

0

Show file

File: documents.py Project: armadillica/pillar

class Node(es.DocType):
    """
    Elastic document describing user
    """

    node_type = es.Keyword()

    objectID = es.Keyword()

    name = es.Text(
        fielddata=True,
        analyzer=autocomplete
    )

    user = es.Object(
        fields={
            'id': es.Keyword(),
            'name': es.Text(
                fielddata=True,
                analyzer=autocomplete)
        }
    )

    description = es.Text()

    is_free = es.Boolean()

    project = es.Object(
        fields={
            'id': es.Keyword(),
            'name': es.Keyword(),
            'url': es.Keyword(),
        }
    )

    media = es.Keyword()

    picture = es.Keyword()

    tags = es.Keyword(multi=True)
    license_notes = es.Text()

    created_at = es.Date()
    updated_at = es.Date()

    class Meta:
        index = 'nodes'

Example #15

0

Show file

class EmbeddingIndex(es.Document):
    corpus = es.Keyword()
    number_of_documents = es.Integer()
    is_ready = es.Boolean()
    name = es.Keyword()
    description = es.Text()
    datetime_created = es.Date()
    datetime_finished = es.Date()

    by_unit = es.Keyword()  # Token/Word/Sentence/Text
    algorithm = es.Keyword()
    pooling = es.Keyword()
    meta_parameters = es.Object()

    class Index:
        name = ES_INDEX_EMBEDDING
        using = ES_CLIENT

Example #16

0

Show file

File: documents.py Project: spreeker/bag_services

class KadastraalSubject(es.DocType):
    naam = es.Text(
        analyzer=analyzers.naam,
        fields={
            'raw': es.Keyword(),
            'ngram': es.Text(
                analyzer=analyzers.kad_sbj_naam,
                search_analyzer=analyzers.kad_obj_aanduiding_keyword)})

    natuurlijk_persoon = es.Boolean()
    geslachtsnaam = es.Text(analyzer=analyzers.naam)
    order = es.Integer()

    subtype = es.Keyword()
    _display = es.Keyword()

    class Index:
        name = settings.ELASTIC_INDICES['BRK_SUBJECT']

Example #17

0

Show file

class META_DTM(es.Document):
    meta_name = es.Keyword()
    volume_days = es.Float()
    delta_days = es.Float()
    reset_index = es.Boolean()
    from_date = es.Date()
    to_date = es.Date()

    class Index:
        name = ES_INDEX_META_DTM
        using = ES_CLIENT

        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 1,
        }
        mappings = {
            "properties": {
                "meta_name": {
                    "type": "keyword",
                },
                "volume_days": {
                    "type": "float",
                },
                "delta_days": {
                    "type": "float",
                },
                "reset_index": {
                    "type": "boolean",
                },
                "from_date": {
                    "type": "date"
                },
                "to_date": {
                    "type": "date"
                }
            },
        }

Example #18

0

Show file

File: documents.py Project: eliasdata/dataselectie

class Inschrijving(es.Document):
    """
    Elastic data of 'vestigingen' or 'mac'
    from handelsregister
    """
    maatschappelijke_activiteit_id = es.Keyword()
    vestiging_id = es.Keyword()

    dataset = es.Keyword()

    kvk_nummer = es.Keyword()
    handelsnaam = es.Keyword()
    datum_aanvang = es.Date()
    eigenaar_naam = es.Keyword()
    eigenaar_id = es.Keyword()
    non_mailing = es.Boolean()

    aantal_werkzame_personen = es.Integer()
    rechtsvorm = es.Keyword()

    # Address information
    bezoekadres_volledig_adres = es.Keyword()
    bezoekadres_correctie = es.Boolean()
    bezoekadres_afgeschermd = es.Boolean()
    bezoekadres_openbare_ruimte = es.Keyword()
    bezoekadres_huisnummer = es.Integer()
    bezoekadres_huisletter = es.Keyword()
    bezoekadres_huisnummertoevoeging = es.Keyword()
    bezoekadres_postcode = es.Keyword()
    bezoekadres_plaats = es.Keyword()

    bezoekadres_buurt_code = es.Keyword()
    bezoekadres_buurt_naam = es.Keyword()
    bezoekadres_buurtcombinatie_code = es.Keyword()
    bezoekadres_buurtcombinatie_naam = es.Keyword()
    bezoekadres_ggw_code = es.Keyword()
    bezoekadres_ggw_naam = es.Keyword()
    bezoekadres_gsg_naam = es.Keyword()
    bezoekadres_stadsdeel_code = es.Keyword()
    bezoekadres_stadsdeel_naam = es.Keyword()

    postadres_volledig_adres = es.Keyword()
    postadres_correctie = es.Boolean()
    postadres_afgeschermd = es.Boolean()
    postadres_openbare_ruimte = es.Keyword()
    postadres_huisnummer = es.Integer()
    postadres_huisletter = es.Keyword()
    postadres_huisnummertoevoeging = es.Keyword()
    postadres_postcode = es.Keyword()
    postadres_plaats = es.Keyword()

    # And the bag numid
    bag_numid = es.Keyword()
    adresseerbaar_object_id = identificatie = es.Keyword()
    centroid = es.GeoPoint()

    # Categores
    hoofdcategorie = es.Keyword(multi=True)
    subcategorie = es.Keyword(multi=True)

    # SBI codes
    sbi_code = es.Text(
        multi=True,
        fielddata=True,
        analyzer=autocomplete,
    )

    sbi_omschrijving = es.Keyword(multi=True)

    sbi_l1 = es.Keyword(multi=True)
    sbi_l2 = es.Keyword(multi=True)
    sbi_l3 = es.Keyword(multi=True)
    sbi_l4 = es.Keyword(multi=True)
    sbi_l5 = es.Keyword(multi=True)

    # bijzondere rechtstoestand

    # status = es.Keyword()

    bijzondere_rechtstoestand = es.Keyword()

    class Meta:
        all = es.MetaField(enabled=False)
        doc_type = 'vestiging'

    class Index:
        doc_type = 'vestiging'
        name = settings.ELASTIC_INDICES['DS_HR_INDEX']

Example #19

0

Show file

class Job(es.DocType):
    class Meta:
        index = 'jobs'
        doc_type = 'job-offer'

    french_elision = es.token_filter('french_elision',
                                     type='elision',
                                     articles_case=True,
                                     articles=[
                                         'l', 'm', 't', 'qu', 'n', 's', 'j',
                                         'd', 'c', 'jusqu', 'quoiqu', 'lorsqu',
                                         'puisqu'
                                     ])

    french_stopwords = es.token_filter('french_stopwords',
                                       type='stop',
                                       stopwords='_french_')

    # Do not include this filter if keywords is empty
    french_keywords = es.token_filter('french_keywords',
                                      type='keyword_marker',
                                      keywords=[])

    french_stemmer = es.token_filter('french_stemmer',
                                     type='stemmer',
                                     language='light_french')

    french_analyzer = es.analyzer(
        'french_analyzer',
        tokenizer='standard',
        filter=[
            'lowercase',
            'asciifolding',
            french_elision,
            french_stopwords,
            # french_keywords,
            french_stemmer
        ],
        char_filter=['html_strip'])

    technologies_tokenizer = es.tokenizer('comma_tokenizer',
                                          type='pattern',
                                          pattern=' |,|, ')

    technologies_synonyms_filter = es.token_filter(
        'technologies_synonyms',
        type='synonym',
        synonyms=[
            'c => c_language', 'c++, cpp => cpp_language',
            'c/c++, c/cpp => c_language', 'c/c++, c/cpp => cpp_language',
            'c#, c♯, csharp => csharp_language',
            'f#, f♯, fsharp => fsharp_language', 'c#, c♯, csharp => dotnet',
            'f#, f♯, fsharp => dotnet', '.net => dotnet'
        ])

    technologies_analyzer = es.analyzer(
        'technologies_analyzer',
        tokenizer=technologies_tokenizer,
        filter=['lowercase', 'asciifolding', technologies_synonyms_filter])

    company_name_analyzer = es.analyzer('company_name_analyzer',
                                        tokenizer='standard',
                                        filter=['lowercase', 'asciifolding'])

    id = es.Integer()

    url = es.String(index='no')
    source = es.String(index='not_analyzed')

    title = es.String(
        analyzer=french_analyzer,
        fields={'technologies': es.String(analyzer=technologies_analyzer)})

    description = es.String(
        analyzer=french_analyzer,
        fields={'technologies': es.String(analyzer=technologies_analyzer)})

    company = es.String(analyzer=company_name_analyzer)

    company_url = es.String(index='no')

    address = es.String(analyzer=french_analyzer)
    address_is_valid = es.Boolean()

    tags = es.Nested(doc_class=Tag,
                     properties=dict(tag=es.String(index='not_analyzed'),
                                     weight=es.Integer()))

    publication_datetime = es.Date()
    publication_datetime_is_fake = es.Boolean()

    crawl_datetime = es.Date()

    geolocation = es.GeoPoint()
    geolocation_is_valid = es.Boolean()

    def __init__(self, meta=None, **kwargs):
        super(Job, self).__init__(meta, **kwargs)
        self._doc_type.index = compute_index_name(self.index)

    @property
    def index(self):
        return self._doc_type.index

    @property
    def doc_type(self):
        return self._doc_type.name

    @property
    def published(self):
        return format_date(self.publication_datetime, locale='FR_fr')

    @property
    def published_in_days(self):
        delta = datetime.now() - self.publication_datetime  # TODO: bugfix
        return format_timedelta(delta, granularity='day', locale='en_US')

    @property
    def alltags(self):
        tags = []
        if self.tags:
            for tag in self.tags:
                if tag['tag'] not in condition_tags:
                    tags.append(Tag2(tag['tag'], tag['weight']))
        return tags

    @property
    def condition_tags(self):
        tags = []
        if self.tags:
            for tag in self.tags:
                if tag['tag'] in condition_tags:
                    tag = Tag2(tag['tag'], tag['weight'],
                               Tag2.get_css(tag['tag']))
                    tags.append(tag)
        return tags

Example #20

0

Show file

class DictionaryWord(es.Document):
    dictionary = es.Keyword()
    word = es.Keyword()
    word_normal = es.Keyword()

    is_in_pymorphy2_dict = es.Boolean()
    is_multiple_normals_in_pymorphy2 = es.Boolean()
    is_stop_word = es.Boolean()
    is_latin = es.Boolean()
    is_kazakh = es.Boolean()
    n_gram_len = es.Integer()
    pos_tag = es.Keyword()
    word_len = es.Integer()

    word_frequency = es.Integer()
    word_normal_frequency = es.Integer()
    document_frequency = es.Integer()
    document_normal_frequency = es.Integer()

    word_frequency_relative = es.Float()
    word_normal_frequency_relative = es.Float()
    document_frequency_relative = es.Float()
    document_normal_frequency_relative = es.Float()

    word_first_capital_ratio = es.Float()
    word_normal_first_capital_ratio = es.Float()

    class Index:
        name = ES_INDEX_DICTIONARY_WORD  # f"{ES_INDEX_DICTIONARY_WORD}_{name}{_temp}"
        using = ES_CLIENT

        settings = {
            "number_of_shards": 3,
            "number_of_replicas": 1,
        }

        mappings = {
            "properties": {
                "dictionary": {
                    "type": "keyword",
                },
                "word": {
                    "type": "keyword",
                },
                "word_normal": {
                    "type": "keyword",
                },
                "is_in_pymorphy2_dict": {
                    "type": "boolean",
                },
                "is_multiple_normals_in_pymorphy2": {
                    "type": "boolean",
                },
                "is_stop_word": {
                    "type": "boolean",
                },
                "is_latin": {
                    "type": "boolean",
                },
                "is_kazakh": {
                    "type": "boolean",
                },
                "n_gram_len": {
                    "type": "integer",
                },
                "pos_tag": {
                    "type": "keyword",
                },
                "word_len": {
                    "type": "integer",
                },
                "word_frequency": {
                    "type": "integer",
                },
                "word_normal_frequency": {
                    "type": "integer",
                },
                "document_frequency": {
                    "type": "integer",
                },
                "document_normal_frequency": {
                    "type": "integer",
                },
                "word_frequency_relative": {
                    "type": "float",
                },
                "word_normal_frequency_relative": {
                    "type": "float",
                },
                "document_frequency_relative": {
                    "type": "float",
                },
                "document_normal_frequency_relative": {
                    "type": "float",
                },
                "word_first_capital_ratio": {
                    "type": "float",
                },
                "word_normal_first_capital_ratio": {
                    "type": "float",
                },
            },
        }

Example #21

0

Show file

class Nummeraanduiding(es.DocType):
    """
    All bag objects should have one or more adresses

    Een nummeraanduiding, in de volksmond ook wel adres genoemd, is een door
    het bevoegde gemeentelijke orgaan als
    zodanig toegekende aanduiding van een verblijfsobject, standplaats of
    ligplaats.

    [Stelselpedia](http://www.amsterdam.nl/stelselpedia/bag-index/catalogus-bag/objectklasse-2/)
    """
    straatnaam = es.Text(analyzer=analyzers.adres,
                         fields={
                             'raw':
                             es.Keyword(),
                             'ngram_edge':
                             es.Text(analyzer=analyzers.autocomplete,
                                     search_analyzer='standard')
                         })

    straatnaam_keyword = es.Keyword()

    straatnaam_nen = es.Text(analyzer=analyzers.adres,
                             fields={
                                 'raw':
                                 es.Keyword(),
                                 'ngram_edge':
                                 es.Text(analyzer=analyzers.autocomplete,
                                         search_analyzer='standard')
                             })

    straatnaam_nen_keyword = es.Keyword()

    straatnaam_ptt = es.Text(analyzer=analyzers.adres,
                             fields={
                                 'raw':
                                 es.Keyword(),
                                 'ngram_edge':
                                 es.Text(analyzer=analyzers.autocomplete,
                                         search_analyzer='standard'),
                                 'keyword':
                                 es.Keyword(normalizer=analyzers.lowercase),
                             })

    straatnaam_ptt_keyword = es.Keyword()

    adres = es.Text(analyzer=analyzers.adres,
                    fields={
                        'raw':
                        es.Keyword(),
                        'ngram_edge':
                        es.Text(analyzer=analyzers.autocomplete,
                                search_analyzer='standard'),
                    })

    comp_address = es.Text(analyzer=analyzers.adres,
                           fields={
                               'raw':
                               es.Keyword(),
                               'ngram':
                               es.Text(analyzer=analyzers.autocomplete,
                                       search_analyzer='standard')
                           })
    comp_address_nen = es.Text(analyzer=analyzers.adres,
                               fields={
                                   'raw':
                                   es.Keyword(),
                                   'ngram':
                                   es.Text(analyzer=analyzers.autocomplete,
                                           search_analyzer='standard')
                               })
    comp_address_ptt = es.Text(analyzer=analyzers.adres,
                               fields={
                                   'raw':
                                   es.Keyword(),
                                   'ngram':
                                   es.Text(analyzer=analyzers.autocomplete,
                                           search_analyzer='standard')
                               })
    comp_address_pcode = es.Text(analyzer=analyzers.adres,
                                 fields={
                                     'raw':
                                     es.Keyword(),
                                     'ngram':
                                     es.Text(analyzer=analyzers.autocomplete,
                                             search_analyzer='standard')
                                 })

    huisnummer = es.Integer(
        fields={'variation': es.Text(analyzer=analyzers.huisnummer)})

    toevoeging = es.Text(analyzer=analyzers.toevoeging,
                         fields={'keyword': es.Keyword()})

    # to return official bag fields
    bag_toevoeging = es.Keyword()
    bag_huisletter = es.Keyword()
    woonplaats = es.Keyword()

    postcode = es.Text(
        analyzer=analyzers.postcode,
        fields=postcode_fields,
    )

    order = es.Integer()

    hoofdadres = es.Boolean()
    status = es.Nested(
        properties={
            'code': es.Keyword(normalizer=analyzers.lowercase),
            'omschrijving': es.Text()
        })

    vbo_status = es.Nested(
        properties={
            'code': es.Keyword(normalizer=analyzers.lowercase),
            'omschrijving': es.Text()
        })

    subtype = es.Keyword()
    _display = es.Keyword()

    landelijk_id = es.Text(analyzer=analyzers.autocomplete,
                           fields={
                               'raw': es.Keyword(),
                               'nozero': es.Text(analyzer=analyzers.nozero)
                           })
    adresseerbaar_object_id = es.Text(  # Is landelijk_id for related verblijfsobject, ligplaats of standplaats
        analyzer=analyzers.autocomplete,
        fields={
            'raw': es.Keyword(),
            'nozero': es.Text(analyzer=analyzers.nozero)
        })

    class Index:
        name = settings.ELASTIC_INDICES['NUMMERAANDUIDING']

Example #22

0

Show file

File: models.py Project: groovecoder/fjord

class ResponseDocType(FjordDocType):
    id = es_dsl.Integer()
    happy = es_dsl.Boolean()
    api = es_dsl.Integer()
    url = es_dsl.String(index='not_analyzed')
    url_domain = es_dsl.String(index='not_analyzed')
    has_email = es_dsl.Boolean()
    description = es_dsl.String(analyzer='snowball')
    category = es_dsl.String(index='not_analyzed')
    description_bigrams = es_dsl.String(index='not_analyzed')
    description_terms = es_dsl.String(analyzer='standard')
    user_agent = es_dsl.String(index='not_analyzed')
    product = es_dsl.String(index='not_analyzed')
    channel = es_dsl.String(index='not_analyzed')
    version = es_dsl.String(index='not_analyzed')
    browser = es_dsl.String(index='not_analyzed')
    browser_version = es_dsl.String(index='not_analyzed')
    platform = es_dsl.String(index='not_analyzed')
    locale = es_dsl.String(index='not_analyzed')
    country = es_dsl.String(index='not_analyzed')
    device = es_dsl.String(index='not_analyzed')
    manufacturer = es_dsl.String(index='not_analyzed')
    source = es_dsl.String(index='not_analyzed')
    campaign = es_dsl.String(index='not_analyzed')
    souce_campaign = es_dsl.String(index='not_analyzed')
    organic = es_dsl.Boolean()
    created = es_dsl.Date()

    docs = ResponseDocTypeManager()

    class Meta:
        pass

    def mlt(self):
        """Returns a search with a morelikethis query for docs like this"""
        # Short responses tend to not repeat any words, so then MLT
        # returns nothing. This fixes that by setting min_term_freq to
        # 1. Longer responses tend to repeat important words, so we can
        # set min_term_freq to 2.
        num_words = len(self.description.split(' '))
        if num_words > 40:
            min_term_freq = 2
        else:
            min_term_freq = 1

        s = self.search()
        if self.product:
            s = s.filter('term', product=self.product)
        if self.platform:
            s = s.filter('term', platform=self.platform)

        s = s.query('more_like_this',
                    fields=['description'],
                    docs=[{
                        '_index': get_index_name(),
                        '_type': self._doc_type.name,
                        '_id': self.id
                    }],
                    min_term_freq=min_term_freq,
                    stop_words=list(ANALYSIS_STOPWORDS))
        return s

    @classmethod
    def get_model(cls):
        return Response

    @classmethod
    def public_fields(cls):
        """Fields that can be publicly-visible

        .. Note::

           Do NOT include fields that have PII in them.

        """
        return ('id', 'happy', 'api', 'url_domain', 'has_email', 'description',
                'category', 'description_bigrams', 'user_agent', 'product',
                'version', 'platform', 'locale', 'source', 'campaign',
                'organic', 'created')

    @property
    def truncated_description(self):
        """Shorten feedback for dashboard view."""
        return smart_truncate(self.description, length=500)

    @classmethod
    def extract_doc(cls, resp, with_id=True):
        """Converts a Response to a dict of values

        This can be used with ``ResponseDocType.from_obj()`` to create a
        ``ResponseDocType`` object or it can be used for indexing.

        :arg resp: a Response object
        :arg with_id: whether or not to include the ``_id`` value--include
            it when you're bulk indexing

        :returns: a dict

        """
        doc = {
            'id':
            resp.id,
            'happy':
            resp.happy,
            'api':
            resp.api,
            'url':
            resp.url,
            'url_domain':
            resp.url_domain,
            'has_email':
            bool(resp.user_email),
            'description':
            resp.description,
            'user_agent':
            resp.user_agent,
            'product':
            resp.product,
            'channel':
            resp.channel,
            'version':
            resp.version,
            'browser':
            resp.browser,
            'browser_version':
            resp.browser_version,
            'platform':
            resp.platform,
            'locale':
            resp.locale,
            'country':
            resp.country,
            'device':
            resp.device,
            'manufacturer':
            resp.manufacturer,
            'source':
            resp.source,
            'campaign':
            resp.campaign,
            'source_campaign':
            '::'.join([(resp.source or '--'), (resp.campaign or '--')]),
            'organic': (not resp.campaign),
            'created':
            resp.created
        }

        # We only compute bigrams for english because the analysis
        # uses English stopwords, stemmers, ...
        if resp.locale.startswith(u'en') and resp.description:
            doc['description_bigrams'] = compute_grams(resp.description)
        else:
            doc['description_bigrams'] = []

        if with_id:
            doc['_id'] = doc['id']
        return doc

Example #23

0

Show file

File: awsdetailedlineitem.py Project: reIMAGINE-Labs/trackit

class AWSDetailedLineitem(dsl.DocType):
    class Meta:
        index = 'awsdetailedlineitem'

    availability_zone = dsl.String(index='not_analyzed')
    cost = dsl.Double()
    un_blended_cost = dsl.Double()
    item_description = dsl.String(index='not_analyzed')
    linked_account_id = dsl.String(index='not_analyzed')
    operation = dsl.String()
    payer_account_id = dsl.String(index='not_analyzed')
    pricing_plan_id = dsl.Long()
    product_name = dsl.String(index='not_analyzed')
    rate = dsl.Double()
    un_blended_rate = dsl.Double()
    rate_id = dsl.Long()
    record_id = dsl.String(index='not_analyzed')
    reserved_instance = dsl.Boolean()
    resource_id = dsl.String(index='not_analyzed')
    subscription_id = dsl.Long()
    tag = dsl.Object(
        properties={
            'key': dsl.String(index='not_analyzed'),
            'value': dsl.String(index='not_analyzed')
        })
    usage_end_date = dsl.Date(format='strict_date_optional_time||epoch_millis')
    usage_quantity = dsl.Double()
    usage_start_date = dsl.Date(
        format='strict_date_optional_time||epoch_millis')
    usage_type = dsl.String(index='not_analyzed')

    @classmethod
    @with_cache(ttl=3600 * 3, worker_refresh=True)
    def keys_has_data(cls, keys, date_from=None, date_to=None):
        date_to = date_to or datetime.utcnow()
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        if date_from:
            s = s.filter('range',
                         usage_start_date={
                             'from': date_from.isoformat(),
                             'to': date_to.isoformat()
                         })
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)
        return res['hits']['total'] > 0

    @classmethod
    @with_cache(is_json=False, ret=lambda x: datetime.strptime(x, "%Y-%m-%d"))
    def get_first_date(cls, keys):
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.sort('usage_start_date')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=1,
                            request_timeout=60)
        if res['hits']['total'] == 0:
            return
        return res['hits']['hits'][0]['_source']['usage_start_date'].split(
            'T')[0]

    @classmethod
    @with_cache(is_json=False, ret=lambda x: datetime.strptime(x, "%Y-%m-%d"))
    def get_last_date(cls, keys, limit=None):
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        if limit:
            s = s.filter('range', usage_start_date={'to': limit.isoformat()})
        s = s.sort('-usage_start_date')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=1,
                            request_timeout=60)
        if res['hits']['total'] == 0:
            return
        return res['hits']['hits'][0]['_source']['usage_start_date'].split(
            'T')[0]

    @classmethod
    def get_first_to_now_date(cls, keys):
        def from_date_to_today(d):
            now = datetime.utcnow()
            while d < now:
                yield d
                d += relativedelta(months=1)

        return list(from_date_to_today(cls.get_first_date(keys)))

    @classmethod
    def get_first_to_last_date(cls, keys):
        def from_date_to_last(d):
            last = cls.get_last_date(keys)
            while d < last:
                yield d
                d += relativedelta(months=1)

        return list(from_date_to_last(cls.get_first_date(keys)))

    @classmethod
    @with_cache(6 * 3600)
    def get_available_tags(cls, keys, only_with_data=None, product_name=None):
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        if product_name:
            s = s.filter('term', product_name=product_name)
        s.aggs.bucket('tag_key', 'terms', field='tag.key')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        tags = []
        for tag in res['aggregations']['tag_key']['buckets']:
            if tag['key'].startswith('user:'******'key'].split(':')[1]
                if not only_with_data or name in AWSStat.latest_hourly_cpu_usage_by_tag(
                        only_with_data
                )['tags'] or name in AWSStat.latest_daily_cpu_usage_by_tag(
                        only_with_data)['tags']:
                    tags.append(name)
        tags.sort()
        return dict(tags=tags)

    @classmethod
    @with_cache(ttl=6 * 3600)
    def get_cost_by_tag(cls, keys, tag, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('term', **{'tag.key': 'user:{}'.format(tag)})
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s.aggs.bucket('total_cost', 'sum', field='cost')
        agg = s.aggs.bucket('tag_value',
                            'terms',
                            field='tag.value',
                            size=0x7FFFFFFF)
        agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        tags = [{
            'tag_value': tag['key'],
            'cost': tag['cost']['value'],
        } for tag in res['aggregations']['tag_value']['buckets']]
        return dict(tags=tags,
                    total_cost=res['aggregations']['total_cost']['value'])

    @classmethod
    @with_cache(ttl=6 * 3600)
    def get_cost(cls, keys, date_from, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(
            hour=23, minute=59, second=59, microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s.aggs.bucket('total_cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        return dict(total_cost=res['aggregations']['total_cost']['value'])

    @classmethod
    @with_cache()
    def get_monthly_cost_by_tag(cls, keys, tag, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('term', **{'tag.key': 'user:{}'.format(tag)})
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='month',
                            min_doc_count=1)
        agg.bucket('total_cost', 'sum', field='cost')
        agg = agg.bucket('tag_value',
                         'terms',
                         field='tag.value',
                         size=0x7FFFFFFF)
        agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        months = [{
            'month':
            interval['key_as_string'].split('T')[0][:-3],
            'tags': [{
                'tag_value': tag['key'],
                'cost': tag['cost']['value'],
            } for tag in interval['tag_value']['buckets']],
            'total_cost':
            interval['total_cost']['value'],
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(months=months)

    @classmethod
    @with_cache()
    def get_cost_by_product(cls,
                            key,
                            date_from=None,
                            date_to=None,
                            without_discount=False,
                            only_discount=False,
                            size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        if without_discount:
            s = s.query(
                'bool',
                filter=[
                    ~dsl.Q('term', item_description='PAR_APN_ProgramFee_2500')
                ])
        if only_discount:
            s = s.filter('term', item_description='PAR_APN_ProgramFee_2500')
        agg = s.aggs.bucket('products',
                            'terms',
                            field='product_name',
                            order={'cost': 'desc'},
                            size=size)
        agg.bucket('cost', 'sum', field='cost')
        s = s.query('bool', filter=[~dsl.Q('term', cost=0)])
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        products = [{
            'product': SHORT_NAMES.get(product['key'], product['key']),
            'cost': product['cost']['value'],
        } for product in res['aggregations']['products']['buckets']]
        return dict(products=products)

    @classmethod
    @with_cache()
    def get_cost_by_region(cls,
                           keys,
                           tagged=False,
                           byaccount=False,
                           date_from=None,
                           date_to=None,
                           size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })

        agg = s.aggs
        if byaccount:
            agg = agg.bucket('accounts', 'terms', field='linked_account_id')
        agg = agg.bucket('intervals',
                         'date_histogram',
                         field='usage_start_date',
                         interval='month',
                         min_doc_count=1)
        agg = agg.bucket('regions',
                         'terms',
                         field='availability_zone',
                         size=size)
        agg.bucket('cost', 'sum', field='cost')
        if tagged:
            agg = agg.bucket('tags', 'terms', field='tag.value')
            agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0)

        return res['aggregations']

    @classmethod
    @with_cache()
    def get_monthly_cost(cls,
                         keys,
                         date_from=None,
                         date_to=None,
                         size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='month',
                            min_doc_count=1)
        agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        res = [{
            'month': interval['key_as_string'].split('T')[0],
            'total_cost': interval['cost']['value'],
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(months=res)

    @classmethod
    @with_cache()
    def get_monthly_cost_by_product(cls,
                                    keys,
                                    tagged=False,
                                    date_from=None,
                                    date_to=None,
                                    size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='month',
                            min_doc_count=1)
        agg = agg.bucket('products', 'terms', field='product_name', size=size)
        agg.bucket('cost', 'sum', field='cost')
        if tagged:
            agg = agg.bucket('tags', 'terms', field='tag.value')
            agg.bucket('cost', 'sum', field='cost')
        s = s.query('bool', filter=[~dsl.Q('term', cost=0)])
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        def tagged_cost(bucket, total):
            total_tag = 0.0
            for tag in bucket:
                total_tag += tag['cost']['value']
                yield (tag['key'], tag['cost']['value'])
            if total != total_tag:
                yield ('untagged', total - total_tag)

        res = [{
            'month':
            interval['key_as_string'].split('T')[0],
            'products': [{
                'product':
                SHORT_NAMES.get(product['key'], product['key']),
                'cost':
                product['cost']['value'],
                'tags': [{
                    'name': tag[0],
                    'cost': tag[1],
                } for tag in tagged_cost(product['tags']['buckets'],
                                         product['cost']['value'])],
            } for product in interval['products']['buckets']] if tagged else [{
                'product':
                SHORT_NAMES.get(product['key'], product['key']),
                'cost':
                product['cost']['value'],
            } for product in interval['products']['buckets']]
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(months=res)

    @classmethod
    @with_cache(ttl=4 * 3600)
    def get_daily_cost_by_product(cls,
                                  keys,
                                  date_from=None,
                                  date_to=None,
                                  size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(
            hour=23, minute=59, second=59, microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='day',
                            min_doc_count=1)
        agg = agg.bucket('products', 'terms', field='product_name', size=size)
        agg.metric('cost', 'sum', field='cost')
        s = s.query('bool', filter=[~dsl.Q('term', cost=0)])
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        res = [{
            'day':
            interval['key_as_string'].split('T')[0],
            'products': [{
                'product':
                SHORT_NAMES.get(product['key'], product['key']),
                'cost':
                product['cost']['value'],
            } for product in interval['products']['buckets']]
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(days=res)

    @classmethod
    @with_cache(ttl=24 * 3600)
    def get_yearly_cost_by_product(cls,
                                   keys,
                                   date_from=None,
                                   date_to=None,
                                   size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(month=12,
                                               day=31,
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='year',
                            min_doc_count=1)
        agg = agg.bucket('products', 'terms', field='product_name', size=size)
        agg.metric('cost', 'sum', field='cost')
        s = s.query('bool', filter=[~dsl.Q('term', cost=0)])
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        res = [{
            'year':
            interval['key_as_string'][:4],
            'products': [{
                'product':
                SHORT_NAMES.get(product['key'], product['key']),
                'cost':
                product['cost']['value'],
            } for product in interval['products']['buckets']]
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(years=res)

    @classmethod
    @with_cache()
    def get_cost_by_resource(cls,
                             keys,
                             date_from=None,
                             date_to=None,
                             search=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        if search:
            s = s.query('wildcard', resource_id='*{}*'.format(search))
        agg = s.aggs.bucket('resources',
                            'terms',
                            field='resource_id',
                            order={'cost': 'desc'},
                            size=0x7FFFFFFF)
        agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        resources = [{
            'resource': resource['key'],
            'cost': resource['cost']['value'],
        } for resource in res['aggregations']['resources']['buckets']]
        return resources

    @classmethod
    def get_monthly_cost_by_resource(cls,
                                     resource_ids,
                                     date_from=None,
                                     date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        if resource_ids:
            s = cls.search()
            s = s.filter('range',
                         usage_start_date={
                             'from': date_from.isoformat(),
                             'to': date_to.isoformat()
                         })
            s = s.filter('terms', resource_id=list(resource_ids))
            agg = s.aggs.bucket('months',
                                'date_histogram',
                                field='usage_start_date',
                                interval='month',
                                min_doc_count=1)
            agg.metric('cost', 'sum', field='cost')
            r = client.search('awsdetailedlineitem',
                              body=s.to_dict(),
                              size=0,
                              request_timeout=60)
            return {
                e['key_as_string']: e['cost']['value']
                for e in r['aggregations']['months']['buckets']
            }
        else:
            return {}

    @classmethod
    @with_cache()
    def get_lambda_usage(cls, keys, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('term', product_name='AWS Lambda')
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('resources',
                            'terms',
                            field='resource_id',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'avg', field='cost')
        agg = agg.bucket('types', 'terms', field='usage_type', size=0x7FFFFFFF)
        agg.metric('quantity', 'sum', field='usage_quantity')
        agg = agg.bucket('descriptions',
                         'terms',
                         field='item_description',
                         size=0x7FFFFFFF)
        agg.metric('quantity', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        #return res

        def _lambda_usage_regb(buckets, endswith):
            for b in buckets:
                if b['key'].endswith(endswith):
                    return b['quantity']['value']

        usages = [{
            'rid':
            usage['key'],
            'name':
            usage['key'].split(':')[-1],
            'requests':
            _lambda_usage_regb(usage['types']['buckets'], '-Request'),
            'gb_seconds':
            _lambda_usage_regb(usage['types']['buckets'], '-Lambda-GB-Second'),
            'cost':
            usage['cost']['value'],
            'raw_cost':
            lambdapricing.get_raw_cost([
                x['descriptions']['buckets'] for x in usage['types']['buckets']
            ]),
        } for usage in res['aggregations']['resources']['buckets']]
        return usages

    @classmethod
    @with_cache()
    def get_s3_bandwidth_costs(cls, key, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('term', product_name='Amazon Simple Storage Service')
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('types',
                            'terms',
                            field='usage_type',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'sum', field='cost')
        agg.metric('gb', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        transfers = [{
            'type': transfer['key'],
            'quantity': transfer['gb']['value'],
            'cost': transfer['cost']['value'],
        } for transfer in res['aggregations']['types']['buckets']]
        return transfers

    @classmethod
    @with_cache()
    def get_ec2_bandwidth_costs(cls, key, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('term', product_name='Amazon Elastic Compute Cloud')
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('types',
                            'terms',
                            field='usage_type',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'sum', field='cost')
        agg.metric('gb', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        transfers = [{
            'type': transfer['key'],
            'quantity': transfer['gb']['value'],
            'cost': transfer['cost']['value'],
        } for transfer in res['aggregations']['types']['buckets']]
        return transfers

    @classmethod
    def get_ec2_daily_cost(cls, key):
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('term', product_name='Amazon Elastic Compute Cloud')

        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='day',
                            min_doc_count=1)
        agg.metric('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for interval in res['aggregations']['intervals']['buckets']:
            yield interval['key_as_string'].split(
                'T')[0], interval['cost']['value']

    @classmethod
    @with_cache()
    def get_elb_usage_a_day(cls, keys, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        gib = Fraction(2**30)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s = s.filter("prefix", resource_id="arn:aws:elasticloadbalancing")
        s = s.sort({"usage_start_date": {"order": "desc"}})
        agg = s.aggs.bucket('rid',
                            'terms',
                            field='resource_id',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'sum', field='cost')
        agg = agg.bucket('types', 'terms', field='usage_type', size=0x7FFFFFFF)
        agg.metric('quantity', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)
        elbs = [{
            'rid':
            elb['key'],
            'cost':
            elb['cost']['value'] / (date_to - date_from).days,
            'hours':
            float(
                sum([
                    x['quantity']['value'] for x in elb['types']['buckets']
                    if x['key'].endswith('LoadBalancerUsage')
                ]) / (date_to - date_from).days),
            'bytes':
            float((sum([
                x['quantity']['value']
                for x in elb['types']['buckets'] if x['key'].endswith('Bytes')
            ]) * gib) / (date_to - date_from).days),
        } for elb in res['aggregations']['rid']['buckets']]
        return elbs

    @classmethod
    @with_cache()
    def get_instance_type(cls, keys, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.extra(_source=[
            'usage_start_date', 'usage_type', 'availability_zone',
            'resource_id'
        ])
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s = s.filter("term", product_name='Amazon Elastic Compute Cloud')
        s = s.query('wildcard', usage_type='*BoxUsage:*')
        s = s.filter('exists', field='resource_id')
        s = s.sort({"usage_start_date": {"order": "desc"}})
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=10000,
                            request_timeout=60)

        def cut_region_name(s):
            return s[:-1] if s[-1].isalpha() else s

        types = []
        refs = {}

        def add_in_types(type, rid):
            ref_tuple = (type['hour'], type['instance'], type['region'])
            if ref_tuple in refs:
                refs[ref_tuple]['rids'].append(rid)
                refs[ref_tuple]['ridCount'] += 1
                return
            type['rids'] = [rid]
            types.append(type)
            refs[ref_tuple] = types[-1]

        for r in res['hits']['hits']:
            elem = {
                'hour':
                r['_source']['usage_start_date'],
                'instance':
                r['_source']['usage_type'].split(':')[1],
                'region':
                cut_region_name(r['_source']['availability_zone'])
                if 'availability_zone' in r['_source'] else 'unknown',
                'ridCount':
                1,
            }
            add_in_types(elem, r['_source']['resource_id'])
        return types

    @classmethod
    @with_cache()
    def get_instance_hour(cls,
                          keys,
                          date_from=None,
                          date_to=None,
                          min_hour=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s = s.filter("term", product_name='Amazon Elastic Compute Cloud')
        s = s.filter('prefix', resource_id='i-')
        s = s.query('wildcard', usage_type='*BoxUsage*')
        agg = s.aggs.bucket('resource_id',
                            'terms',
                            field='resource_id',
                            size=0x7FFFFFFF)
        agg.bucket('days',
                   'date_histogram',
                   field='usage_start_date',
                   interval='day',
                   min_doc_count=1)
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        instance_list = []
        for instance in res['aggregations']['resource_id']['buckets']:
            tmp_hours = []
            for day in instance['days']['buckets']:
                tmp_hours.append(day['doc_count'])
            avg_hours = sum(tmp_hours) / float(len(tmp_hours))
            if not min_hour or avg_hours >= min_hour:
                instance_list.append(dict(id=instance['key'], hours=avg_hours))
        return sorted(instance_list, key=lambda x: x['hours'], reverse=True)

    @classmethod
    @with_cache()
    def get_s3_buckets_per_tag(cls, keys):
        def _check_if_in_list(dict_list, value, key):
            return next((item for item in dict_list if item[key] == value),
                        None)

        def _parse_tag_keys_results(res):
            bucket_tagged = []
            for bucket_tag_key in res['aggregations']['tag_key']['buckets']:
                buff_tag_key = _check_if_in_list(bucket_tagged,
                                                 bucket_tag_key['key'],
                                                 'tag_key')
                if buff_tag_key is None:
                    buff_tag_key = {
                        "tag_key": bucket_tag_key['key'],
                        "tag_value": []
                    }
                buff_tag_key = _parse_tag_values_results(
                    bucket_tag_key, buff_tag_key)
                bucket_tagged.append(buff_tag_key)
            return bucket_tagged

        def _parse_tag_values_results(bucket_tag_key, buff_tag_key):
            for bucket_tag_value in bucket_tag_key['tag_value']['buckets']:
                buff_tag_value = _check_if_in_list(buff_tag_key['tag_value'],
                                                   bucket_tag_value['key'],
                                                   'tag_value')
                if buff_tag_value is None:
                    buff_tag_value = {
                        "tag_value": bucket_tag_value['key'],
                        "s3_buckets": []
                    }
                buff_tag_value = _parse_buckets_results(
                    buff_tag_value, bucket_tag_value)
                buff_tag_key['tag_value'].append(buff_tag_value)
            return buff_tag_key

        def _parse_buckets_results(buff_tag_value, bucket_tag_value):
            for bucket_resource_id in bucket_tag_value['ressource_id'][
                    'buckets']:
                buff_bucket_resource_id = _check_if_in_list(
                    buff_tag_value['s3_buckets'], bucket_resource_id['key'],
                    'bucket_name')
                if buff_bucket_resource_id is None:
                    buff_bucket_resource_id = {
                        "bucket_name":
                        bucket_resource_id['key'],
                        "account_id":
                        bucket_resource_id['account_id']['buckets'][0]['key']
                    }
                buff_tag_value['s3_buckets'].append(buff_bucket_resource_id)
            return buff_tag_value

        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('term', product_name='Amazon Simple Storage Service')
        s = s.query('exists', field="tag")
        s = s.query('wildcard', item_description="*storage*")
        agg = s.aggs.bucket('tag_key', 'terms', field="tag.key")
        agg = agg.bucket('tag_value', 'terms', field='tag.value')
        agg.bucket('ressource_id', 'terms',
                   field='resource_id').bucket('account_id',
                                               'terms',
                                               field='linked_account_id')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)
        '''
        bucket_tagged structure
        [{
            "tag_key" : "KEY", # Unique in list
            "tag_value": [{
                "tag_value": "VALUE", # Unique in list
                "s3_buckets": [{
                    "bucket_name": "BUCKET_NAME",
                    "account_id": "ACCOUND_ID"
                }, {...}]
            }, {...}]
        }, {...}]
        '''

        bucket_tagged = _parse_tag_keys_results(res)
        return bucket_tagged

    @classmethod
    @with_cache()
    def get_s3_bandwidth_info_and_cost_per_name(cls,
                                                key,
                                                bucket_resource_ids,
                                                date_from=None,
                                                date_to=None):
        date_from = date_from or (datetime.utcnow() - relativedelta(
            month=1)).replace(day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('term', product_name='Amazon Simple Storage Service')
        s = s.filter('terms',
                     resource_id=bucket_resource_ids if isinstance(
                         bucket_resource_ids, list) else [bucket_resource_ids])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s = s.filter('wildcard', usage_type="*Bytes")
        agg = s.aggs.bucket('bucket_name',
                            'terms',
                            field='resource_id',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'sum', field='cost')
        agg = agg.bucket('transfer_type', 'terms', field='usage_type')
        agg.metric('data', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)
        data = [{
            "bucket_name":
            bucket['key'],
            "cost":
            bucket['cost']['value'],
            "transfer_stats": [{
                "type": transfer_stat['key'],
                "data": transfer_stat['data']['value']
            } for transfer_stat in bucket['transfer_type']['buckets']]
        } for bucket in res['aggregations']['bucket_name']['buckets']]
        return data

Example #24

0

Show file

class Company(es.DocType):
    class Meta:
        index = 'companies'
        doc_type = 'company'

    french_elision = es.token_filter(
        'french_elision',
        type='elision',
        articles_case=True,
        articles=[
            'l', 'm', 't', 'qu', 'n', 's',
            'j', 'd', 'c', 'jusqu', 'quoiqu',
            'lorsqu', 'puisqu'
        ]
    )

    french_stopwords = es.token_filter(
        'french_stopwords',
        type='stop',
        stopwords='_french_'
    )

    # Do not include this filter if keywords is empty
    french_keywords = es.token_filter(
        'french_keywords',
        type='keyword_marker',
        keywords=[]
    )

    french_stemmer = es.token_filter(
        'french_stemmer',
        type='stemmer',
        language='light_french'
    )

    french_analyzer = es.analyzer(
        'french_analyzer',
        tokenizer='standard',
        filter=[
            'lowercase',
            'asciifolding',
            french_elision,
            french_stopwords,
            # french_keywords,
            french_stemmer
        ],
        char_filter=['html_strip']
    )

    technologies_tokenizer = es.tokenizer(
        'comma_tokenizer',
        type='pattern',
        pattern=' |,|, '
    )

    technologies_synonyms_filter = es.token_filter(
        'technologies_synonyms',
        type='synonym',
        synonyms=[
            'c => c_language',
            'c++, cpp => cpp_language',
            'c/c++, c/cpp => c_language',
            'c/c++, c/cpp => cpp_language',
            'c#, c♯, csharp => csharp_language',
            'f#, f♯, fsharp => fsharp_language',
            'c#, c♯, csharp => dotnet',
            'f#, f♯, fsharp => dotnet',
            '.net => dotnet'
        ]
    )

    technologies_analyzer = es.analyzer(
        'technologies_analyzer',
        tokenizer=technologies_tokenizer,
        filter=[
            'lowercase',
            'asciifolding',
            technologies_synonyms_filter
        ]
    )

    company_name_analyzer = es.analyzer(
        'company_name_analyzer',
        tokenizer='standard',
        filter=[
            'lowercase',
            'asciifolding'
        ]
    )

    id = es.String(index='no')

    name = es.String(analyzer=french_analyzer)

    description = es.String(
        analyzer=french_analyzer,
        fields={
            'technologies': es.String(analyzer=technologies_analyzer)
        }
    )

    technologies = es.String(analyzer=technologies_analyzer)

    url = es.String(index='no')
    logo_url = es.String(index='no')

    address = es.String(analyzer=french_analyzer)
    address_is_valid = es.Boolean()

    email = es.String(index='no')
    phone = es.String(index='no')

    geolocation = es.GeoPoint()
    geolocation_is_valid = es.Boolean()

    def __init__(self, meta=None, **kwargs):
        super(Company, self).__init__(meta, **kwargs)
        self._doc_type.index = compute_index_name(self.index)

    @property
    def index(self):
        return self._doc_type.index

    @property
    def doc_type(self):
        return self._doc_type.name

Example #25

0

Show file

class Inschrijving(es.DocType):

    _display = es.Keyword()

    _kvk_display = es.Keyword()

    doctype = es.Keyword()

    kvk_nummer = es.Text(analyzer=analyzers.autocomplete,
                         fields={
                             'raw': es.Keyword(),
                             'nozero': es.Text(analyzer=analyzers.nozero)
                         })

    vestigingsnummer = es.Text(analyzer=analyzers.autocomplete,
                               fields={
                                   'raw': es.Keyword(),
                                   'nozero':
                                   es.Text(analyzer=analyzers.nozero),
                                   'int': es.Integer()
                               })

    hoofdvestiging = es.Boolean()

    sbi = es.Nested(
        properties={
            'code':
            es.Text(analyzer=analyzers.autocomplete,
                    fields={'raw': es.Keyword()}),
            'omschrijving':
            es.Text(),
        })

    naam = es.Text(analyzer=analyzers.adres,
                   fields={
                       'raw':
                       es.Keyword(),
                       'ngram':
                       es.Text(analyzer=analyzers.autocomplete,
                               search_analyzer='standard')
                   })

    handelsnamen = es.Nested(
        properties={
            'naam':
            es.Text(analyzer=analyzers.adres,
                    fields={
                        'raw':
                        es.Keyword(),
                        'ngram':
                        es.Text(analyzer=analyzers.autocomplete,
                                search_analyzer='standard')
                    })
        })

    postadres = es.Text(analyzer=analyzers.adres,
                        fields={
                            'raw':
                            es.Keyword(),
                            'ngram':
                            es.Text(analyzer=analyzers.autocomplete,
                                    search_analyzer='standard')
                        })

    bezoekadres = es.Text(analyzer=analyzers.adres,
                          fields={
                              'raw':
                              es.Keyword(),
                              'ngram':
                              es.Text(analyzer=analyzers.autocomplete,
                                      search_analyzer='standard')
                          })

    bezoekadres_correctie = es.Boolean()

    # hoofdvestiging

    centroid = es.GeoPoint()

    class Index:
        name = settings.ELASTIC_INDICES['HR']

Example #26

0

Show file

class ValidationJob(elasticsearch_dsl.Document):
    """
    Object for validation data.
    TODO: Can this be merged with TrainingJob, with a common
        parent object?
    """
    id = elasticsearch_dsl.Integer()
    schema_version = elasticsearch_dsl.Integer()
    job_name = elasticsearch_dsl.Keyword()
    author = elasticsearch_dsl.Keyword()
    created_at = elasticsearch_dsl.Date()
    params = elasticsearch_dsl.Text()
    raw_log = elasticsearch_dsl.Text()

    # Metrics
    purported_acc = elasticsearch_dsl.Float()
    purported_loss = elasticsearch_dsl.Float()
    purported_sensitivity = elasticsearch_dsl.Float()

    avg_test_acc = elasticsearch_dsl.Float()
    avg_test_loss = elasticsearch_dsl.Float()
    avg_test_sensitivity = elasticsearch_dsl.Float()
    avg_test_specificity = elasticsearch_dsl.Float()
    avg_test_true_pos = elasticsearch_dsl.Float()
    avg_test_false_neg = elasticsearch_dsl.Float()
    avg_test_auc = elasticsearch_dsl.Float()

    best_test_acc = elasticsearch_dsl.Float()
    best_test_loss = elasticsearch_dsl.Float()
    best_test_sensitivity = elasticsearch_dsl.Float()
    best_test_specificity = elasticsearch_dsl.Float()
    best_test_true_pos = elasticsearch_dsl.Float()
    best_test_false_neg = elasticsearch_dsl.Float()
    best_test_auc = elasticsearch_dsl.Float()
    best_end_val_acc = elasticsearch_dsl.Float()
    best_end_val_loss = elasticsearch_dsl.Float()
    best_max_val_acc = elasticsearch_dsl.Float()
    best_max_val_loss = elasticsearch_dsl.Float()

    # Params
    batch_size = elasticsearch_dsl.Integer()
    val_split = elasticsearch_dsl.Float()
    seed = elasticsearch_dsl.Integer()

    rotation_range = elasticsearch_dsl.Float()
    width_shift_range = elasticsearch_dsl.Float()
    height_shift_range = elasticsearch_dsl.Float()
    shear_range = elasticsearch_dsl.Float()
    zoom_range = elasticsearch_dsl.Keyword()
    horizontal_flip = elasticsearch_dsl.Boolean()
    vertical_flip = elasticsearch_dsl.Boolean()

    dropout_rate1 = elasticsearch_dsl.Float()
    dropout_rate2 = elasticsearch_dsl.Float()

    data_dir = elasticsearch_dsl.Keyword()
    gcs_url = elasticsearch_dsl.Keyword()

    mip_thickness = elasticsearch_dsl.Integer()
    height_offset = elasticsearch_dsl.Integer()
    pixel_value_range = elasticsearch_dsl.Keyword()

    # We need to keep a list of params for the parser because
    # we can't use traditional approaches to get the class attrs
    params_to_parse = [
        'batch_size', 'val_split', 'seed', 'rotation_range',
        'width_shift_range', 'height_shift_range', 'shear_range', 'zoom_range',
        'horizontal_flip', 'vertical_flip', 'dropout_rate1', 'dropout_rate2',
        'data_dir', 'gcs_url', 'mip_thickness', 'height_offset',
        'pixel_value_range'
    ]

    class Index:
        name = VALIDATION_JOBS