예제 #1
0
class AWSELBInfo(dsl.DocType):
    class Meta:
        index = 'awselbinfo'

    linked_account_id = dsl.String(index='not_analyzed')
    name = dsl.String(index='not_analyzed')
    region = dsl.String(index='not_analyzed')
    instances = dsl.String()

    @classmethod
    def init(cls, index=None, using=None):
        client.indices.create('awselbinfo', ignore=400)
        client.indices.put_mapping(index='awselbinfo',
                                   doc_type='a_ws_el_binfo',
                                   body={'_ttl': {
                                       'enabled': True
                                   }})
        cls._doc_type.init(index, using)

    @classmethod
    def get_elb_info(cls, key):
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.sort('-_ttl')
        res = client.search(index='awselbinfo',
                            body=s.to_dict(),
                            size=10000,
                            request_timeout=60)
        if res['hits']['total'] == 0:
            return []
        return [{
            'instances': elb['_source']['instances'].split(' '),
            'name': elb['_source']['name'],
            'region': elb['_source']['region'],
        } for elb in res['hits']['hits']]
예제 #2
0
class GroupDocument(esd.DocType):
    date = esd.Date()
    aperture = esd.Float()
    exposure = esd.Float()
    focal_length = esd.Float()
    focal_length_35 = esd.Float()
    iso = esd.Integer()
    model = esd.String(index='not_analyzed') #analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ]))
    lens = esd.String(index='not_analyzed')
    path = esd.String(index='not_analyzed')
    dirname = esd.String(index='not_analyzed')
    basename = esd.String(index='not_analyzed')
예제 #3
0
class BaseDocument(dsl.DocType):
    """Base document class to build ElasticSearch documents.

    This is standard ``elasticsearch-dsl`` ``DocType`` class with
    already added fields for handling permissions.

    """

    #: list of user ids with view permission on the object
    users_with_permissions = dsl.String(multi=True)

    #: list of group ids with view permission on the object
    groups_with_permissions = dsl.String(multi=True)
예제 #4
0
class TestSearchDocument(BaseDocument):
    # pylint: disable=no-member
    name = dsl.String()
    num = dsl.Integer()
    json = dsl.Object()

    class Meta:
        index = 'test_search'
예제 #5
0
class AWSIdNameMapping(dsl.DocType):
    class Meta:
        index = 'awsidnamemapping'
    key = dsl.String(index='not_analyzed')
    rid = dsl.String(index='not_analyzed')
    name = dsl.String(index='not_analyzed')
    date = dsl.Date(format='date_optional_time||epoch_millis')

    @classmethod
    def get_id_name_mapping(cls, key):
        s = cls.search()
        s = s.query('match', key=key).sort('-date')
        res = {}
        for hit in s.scan():
            if hit.rid not in res:
                res[hit.rid] = hit.name
        return res
예제 #6
0
class InfoRiegoRecord(dsl.DocType):
    code = dsl.String()
    location = dsl.String()
    date = dsl.Date()
    rain = dsl.Float()
    temperature = dsl.Float()
    rel_humidity = dsl.Float()
    radiation = dsl.Float()
    wind_speed = dsl.Float()
    wind_direction = dsl.Float()

    lat_lon = dsl.GeoPoint(lat_lon=True)
    station_height = dsl.Integer()

    def save(self, **kwargs):
        return super(InfoRiegoRecord, self).save(**kwargs)

    class Meta:
        index = 'inforiego'
예제 #7
0
class TestAnalyzerSearchDocument(BaseDocument):
    # pylint: disable=no-member
    name = dsl.String(analyzer=dsl.analyzer(
        'test_analyzer',
        tokenizer='keyword',
        filter=[
            'lowercase',
        ],
    ))

    class Meta:
        index = 'test_analyzer_search'
예제 #8
0
class TestSearchDocument(BaseDocument):
    # pylint: disable=no-member
    id = dsl.Integer()  # pylint: disable=invalid-name
    name = dsl.String()
    num = dsl.Integer()
    json = dsl.Object()

    field_name = Name()
    field_process_type = ProcessType()
    none_test = dsl.Integer()

    class Meta:
        index = 'test_search'
예제 #9
0
class Locatie(es.DocType):
    ext_id = es.String(index='not_analyzed')
    naam = es.String(analyzer=dutch_analyzer)
    centroid = es.GeoPoint()
    openbare_ruimte_naam = es.String(index='not_analyzed')
    huisnummer = es.String(index='not_analyzed')
    huisnummer_toevoeging = es.String(index='not_analyzed')
    postcode = es.String(index='not_analyzed')
예제 #10
0
class sigpac_record(dsl.DocType):
    dn_pk = dsl.Long()

    provincia = dsl.Integer()
    municipio = dsl.Integer()
    poligono = dsl.Integer()
    parcela = dsl.Integer()
    recinto = dsl.Integer()
    zona = dsl.Integer()

    perimetro = dsl.Long()
    superficie = dsl.Long()
    pend_med = dsl.Integer()
    points = dsl.GeoShape()
    bbox = dsl.GeoShape()
    bbox_center = dsl.GeoPoint(lat_lon=True)

    uso_sigpac = dsl.String()

    agregado = dsl.Integer()
    cap_auto = dsl.Integer()
    cap_manual = dsl.Integer()
    coef_regadio = dsl.Float()
    c_refpar = dsl.String()
    c_refpol = dsl.String()
    c_refrec = dsl.String()
    dn_oid = dsl.Long()

    elevation = dsl.Float()

    def save(self, **kwargs):
        return super(sigpac_record, self).save(**kwargs)

    class Meta:
        index = 'plots'
        doc_type = 'sigpac'
예제 #11
0
class MappingSearchDocument(BaseDocument):
    """Index for mapping search."""

    # pylint: disable=no-member
    relation_type = dsl.String(index='not_analyzed')
    source_db = dsl.String(index='not_analyzed')
    source_id = dsl.String(index='not_analyzed')
    source_species = dsl.String(index='not_analyzed')
    target_db = dsl.String(index='not_analyzed')
    target_id = dsl.String(index='not_analyzed')
    target_species = dsl.String(index='not_analyzed')
    relation_type = dsl.String(index='not_analyzed')

    class Meta:
        """Meta class for mapping search document."""

        index = 'mapping_search'
예제 #12
0
def document_field(field):
    """
    The default ``field_factory`` method for converting Django field instances to ``elasticsearch_dsl.Field`` instances.
    Auto-created fields (primary keys, for example) and one-to-many fields (reverse FK relationships) are skipped.
    """
    if field.auto_created or field.one_to_many:
        return None
    if field.many_to_many:
        return RawMultiString
    defaults = {
        models.DateField: dsl.Date(),
        models.DateTimeField: dsl.Date(),
        models.IntegerField: dsl.Long(),
        models.PositiveIntegerField: dsl.Long(),
        models.BooleanField: dsl.Boolean(),
        models.NullBooleanField: dsl.Boolean(),
        models.SlugField: dsl.String(index='not_analyzed'),
        models.DecimalField: dsl.Double(),
        models.FloatField: dsl.Float(),
    }
    return defaults.get(field.__class__, RawString)
예제 #13
0
class PhotoDocument(esd.DocType):
    date = esd.Date()
    aperture = esd.Float()
    exposure = esd.Float()
    focal_length = esd.Float()
    focal_length_35 = esd.Float()
    iso = esd.Integer()
    size = esd.Integer()
    model = esd.String(index='not_analyzed') #analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ]))
    model_ci = esd.String(analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ]))
    lens = esd.String(index='not_analyzed')
    lens_ci = esd.String(analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ]))
    path = esd.String(index='not_analyzed')
    dirname = esd.String(index='not_analyzed')
    basename = esd.String(index='not_analyzed')

    def extended_dict(self):
        dct = self.to_dict()
        dct["id"] = self.meta.id
        return dct
예제 #14
0
class Geocomplete(es.DocType):
    class Meta:
        index = 'geocomplete'
        doc_type = 'geoloc-entry'

    french_elision = es.token_filter('french_elision',
                                     type='elision',
                                     articles_case=True,
                                     articles=[
                                         'l', 'm', 't', 'qu', 'n', 's', 'j',
                                         'd', 'c', 'jusqu', 'quoiqu', 'lorsqu',
                                         'puisqu'
                                     ])

    geocompletion_ngram_filter = es.token_filter('geocompletion_ngram',
                                                 type='edgeNGram',
                                                 min_gram=1,
                                                 max_gram=50,
                                                 side='front')

    town_filter = es.token_filter('town_filter',
                                  type='pattern_replace',
                                  pattern=' ',
                                  replacement='-')

    geocompletion_index_tokenizer = es.tokenizer(
        'geocompletion_index_tokenizer', type='pattern', pattern='@')

    geocompletion_index_analyzer = es.analyzer(
        'geocompletion_index_analyzer',
        type='custom',
        tokenizer=geocompletion_index_tokenizer,
        filter=[
            'lowercase', 'asciifolding', french_elision, town_filter,
            geocompletion_ngram_filter
        ])

    geocompletion_search_analyzer = es.analyzer(
        'geocompletion_search_analyzer',
        type='custom',
        tokenizer=geocompletion_index_tokenizer,
        filter=['lowercase', 'asciifolding', town_filter, french_elision])

    name = es.String(index='analyzed',
                     analyzer=geocompletion_index_analyzer,
                     search_analyzer=geocompletion_search_analyzer,
                     fields=dict(raw=es.String(index='not_analyzed')))

    complement = es.String(index='not_analyzed')

    postal_code_ngram_filter = es.token_filter('postal_code_ngram',
                                               type='edgeNGram',
                                               min_gram=1,
                                               max_gram=5,
                                               side='front')

    postal_code_index_analyzer = es.analyzer('postal_code_index_analyzer',
                                             type='custom',
                                             tokenizer='standard',
                                             filter=[postal_code_ngram_filter])

    postal_code_search_analyzer = es.analyzer('postal_code_search_analyzer',
                                              type='custom',
                                              tokenizer='standard')

    postal_code = es.String(index='analyzed',
                            analyzer=postal_code_index_analyzer,
                            search_analyzer=postal_code_search_analyzer,
                            fields=dict(raw=es.String(index='not_analyzed')))

    geolocation = es.GeoPoint()

    weight = es.Float()

    def __init__(self, meta=None, **kwargs):
        super(Geocomplete, self).__init__(meta, **kwargs)

        if self.index in compute_index_name(self.index):
            self._doc_type.index = compute_index_name(self.index)

    @property
    def index(self):
        return self._doc_type.index

    @property
    def doc_type(self):
        return self._doc_type.name
예제 #15
0
class S3BucketFile(dsl.DocType):
    class Meta:
        index = 's3bucketfile'

    bucket = dsl.String(index='not_analyzed')
    key = dsl.String(index='not_analyzed',
                     fields={'path': dsl.String(analyzer=directory_analyzer)})
    tags = dsl.String(index='not_analyzed',
                      fields={'key': dsl.String(analyzer=tag_key_analyzer)})
    size = dsl.Integer()
    modified = dsl.Date(format='date_optional_time||epoch_millis')

    @classmethod
    def get_bucket_sizes(cls, buckets):
        s = cls.search()
        s = s.filter('terms', bucket=buckets)
        agg = s.aggs.bucket('buckets',
                            'terms',
                            field='bucket',
                            size=len(buckets))
        agg.metric('size', 'sum', field='size')
        res = client.search(index='s3bucketfile', body=s.to_dict(), size=0)
        for bucket in res['aggregations']['buckets']['buckets']:
            yield bucket['key'], bucket['size']['value']

    @classmethod
    def get_dir_sizes(cls, bucket, path=None):
        s = cls.search()
        s = s.filter('term', bucket=bucket)
        if path:
            s = s.filter({'term': {'key.path': path}})
        path_regex = '[^/]+'
        if path:
            path_regex = path + '/' + path_regex
        agg = s.aggs.bucket('dirs',
                            'terms',
                            field='key.path',
                            size=1000,
                            include=path_regex)
        agg.metric('size', 'sum', field='size')
        res = client.search(index='s3bucketfile', body=s.to_dict(), size=0)
        for directory in res['aggregations']['dirs']['buckets']:
            key = directory['key']
            if path:
                key = key.replace(path, '')
                if key.startswith('/'):
                    key = key[1:]
            yield key, directory['size']['value']

    @classmethod
    def get_bucket_tags(cls, buckets, tags=[], tagkey=None):
        return cls.get_dir_tags(buckets, tags=tags, tagkey=tagkey)

    @classmethod
    def get_dir_tags(cls, buckets, path=None, tags=[], tagkey=None):
        s = cls.search()
        if isinstance(buckets, list):
            s = s.filter('terms', bucket=buckets)
        else:
            s = s.filter('term', bucket=buckets)
        if path:
            s = s.filter({'term': {'key.path': path}})
        if tags:
            s = s.filter('terms', tags=tags)
        if tagkey:
            agg = s.aggs.bucket('tags',
                                'terms',
                                field='tags',
                                include=re.escape(tagkey) + '=.*',
                                size=500)
        else:
            agg = s.aggs.bucket('tags', 'terms', field='tags', size=500)
        agg.metric('size', 'sum', field='size')
        s.aggs.bucket('tagkeys', 'terms', field='tags.key', size=100)

        res = client.search(index='s3bucketfile', body=s.to_dict(), size=0)
        tags_agg = res['aggregations']['tags']['buckets']
        tags = dict((b['key'], b['size']['value']) for b in tags_agg)
        tagkeys = [b['key'] for b in res['aggregations']['tagkeys']['buckets']]
        return tags, tagkeys
예제 #16
0
        from ``cls.get_id(obj)``. Uses ``seeker.mapping.serialize_object`` to build the field data dictionary.
        """
        data = {'_id': cls.get_id(obj)}
        data.update(serialize_object(obj, cls._doc_type.mapping, prepare=cls))
        return data

    @property
    def instance(self):
        """
        Returns the Django model instance corresponding to this document, fetched using ``cls.queryset()``.
        """
        return self.queryset().get(pk=self.meta.id)


RawString = dsl.String(analyzer=DEFAULT_ANALYZER,
                       fields={
                           'raw': dsl.String(index='not_analyzed'),
                       })
"""
An ``elasticsearch_dsl.String`` instance (analyzed using ``SEEKER_DEFAULT_ANALYZER``) with a ``raw`` sub-field that is
not analyzed, suitable for aggregations, sorting, etc.
"""

RawMultiString = dsl.String(analyzer=DEFAULT_ANALYZER,
                            multi=True,
                            fields={
                                'raw': dsl.String(index='not_analyzed'),
                            })
"""
The same as ``RawString``, but with ``multi=True`` specified, so lists are returned.
"""
예제 #17
0
class FeatureSearchDocument(BaseDocument):
    """Index for feature search."""

    # pylint: disable=no-member
    source = dsl.String(index='not_analyzed')
    feature_id = dsl.String(
        index='not_analyzed',
        # Additional subfield used for boosting during autocomplete.
        fields={'lower': {
            'type': 'string',
            'analyzer': identifier_analyzer
        }},
    )
    species = dsl.String()
    type = dsl.String()  # pylint: disable=invalid-name
    sub_type = dsl.String()
    name = dsl.String(
        index='not_analyzed',
        # Additional subfield used for boosting during autocomplete.
        fields={'lower': {
            'type': 'string',
            'analyzer': identifier_analyzer
        }},
    )
    full_name = dsl.String()
    description = dsl.String()
    aliases = dsl.String(
        multi=True,
        index='not_analyzed',
        # Additional subfield used for boosting during autocomplete.
        fields={'lower': {
            'type': 'string',
            'analyzer': identifier_analyzer
        }},
    )

    # Autocomplete.
    autocomplete = dsl.String(
        multi=True,
        # During indexing, we lowercase terms and tokenize using edge_ngram.
        analyzer=dsl.analyzer(
            'autocomplete_index',
            tokenizer='keyword',
            filter=[
                'lowercase',
                dsl.token_filter('autocomplete_filter',
                                 type='edgeNGram',
                                 min_gram=1,
                                 max_gram=15)
            ],
        ),
        # During search, we only lowercase terms.
        search_analyzer=dsl.analyzer(
            'autocomplete_search',
            tokenizer='keyword',
            filter=['lowercase'],
        ),
    )

    class Meta:
        """Meta class for feature search document."""

        index = 'feature_search'
예제 #18
0
class Job(es.DocType):
    class Meta:
        index = 'jobs'
        doc_type = 'job-offer'

    french_elision = es.token_filter('french_elision',
                                     type='elision',
                                     articles_case=True,
                                     articles=[
                                         'l', 'm', 't', 'qu', 'n', 's', 'j',
                                         'd', 'c', 'jusqu', 'quoiqu', 'lorsqu',
                                         'puisqu'
                                     ])

    french_stopwords = es.token_filter('french_stopwords',
                                       type='stop',
                                       stopwords='_french_')

    # Do not include this filter if keywords is empty
    french_keywords = es.token_filter('french_keywords',
                                      type='keyword_marker',
                                      keywords=[])

    french_stemmer = es.token_filter('french_stemmer',
                                     type='stemmer',
                                     language='light_french')

    french_analyzer = es.analyzer(
        'french_analyzer',
        tokenizer='standard',
        filter=[
            'lowercase',
            'asciifolding',
            french_elision,
            french_stopwords,
            # french_keywords,
            french_stemmer
        ],
        char_filter=['html_strip'])

    technologies_tokenizer = es.tokenizer('comma_tokenizer',
                                          type='pattern',
                                          pattern=' |,|, ')

    technologies_synonyms_filter = es.token_filter(
        'technologies_synonyms',
        type='synonym',
        synonyms=[
            'c => c_language', 'c++, cpp => cpp_language',
            'c/c++, c/cpp => c_language', 'c/c++, c/cpp => cpp_language',
            'c#, c♯, csharp => csharp_language',
            'f#, f♯, fsharp => fsharp_language', 'c#, c♯, csharp => dotnet',
            'f#, f♯, fsharp => dotnet', '.net => dotnet'
        ])

    technologies_analyzer = es.analyzer(
        'technologies_analyzer',
        tokenizer=technologies_tokenizer,
        filter=['lowercase', 'asciifolding', technologies_synonyms_filter])

    company_name_analyzer = es.analyzer('company_name_analyzer',
                                        tokenizer='standard',
                                        filter=['lowercase', 'asciifolding'])

    id = es.Integer()

    url = es.String(index='no')
    source = es.String(index='not_analyzed')

    title = es.String(
        analyzer=french_analyzer,
        fields={'technologies': es.String(analyzer=technologies_analyzer)})

    description = es.String(
        analyzer=french_analyzer,
        fields={'technologies': es.String(analyzer=technologies_analyzer)})

    company = es.String(analyzer=company_name_analyzer)

    company_url = es.String(index='no')

    address = es.String(analyzer=french_analyzer)
    address_is_valid = es.Boolean()

    tags = es.Nested(doc_class=Tag,
                     properties=dict(tag=es.String(index='not_analyzed'),
                                     weight=es.Integer()))

    publication_datetime = es.Date()
    publication_datetime_is_fake = es.Boolean()

    crawl_datetime = es.Date()

    geolocation = es.GeoPoint()
    geolocation_is_valid = es.Boolean()

    def __init__(self, meta=None, **kwargs):
        super(Job, self).__init__(meta, **kwargs)
        self._doc_type.index = compute_index_name(self.index)

    @property
    def index(self):
        return self._doc_type.index

    @property
    def doc_type(self):
        return self._doc_type.name

    @property
    def published(self):
        return format_date(self.publication_datetime, locale='FR_fr')

    @property
    def published_in_days(self):
        delta = datetime.now() - self.publication_datetime  # TODO: bugfix
        return format_timedelta(delta, granularity='day', locale='en_US')

    @property
    def alltags(self):
        tags = []
        if self.tags:
            for tag in self.tags:
                if tag['tag'] not in condition_tags:
                    tags.append(Tag2(tag['tag'], tag['weight']))
        return tags

    @property
    def condition_tags(self):
        tags = []
        if self.tags:
            for tag in self.tags:
                if tag['tag'] in condition_tags:
                    tag = Tag2(tag['tag'], tag['weight'],
                               Tag2.get_css(tag['tag']))
                    tags.append(tag)
        return tags
예제 #19
0
class AWSMetric(dsl.DocType):
    class Meta:
        index = 'awsmetric'

    key = dsl.String(index='not_analyzed')
    resource = dsl.String(index='not_analyzed')
    metric = dsl.String(index='not_analyzed')
    time = dsl.Date(format='date_optional_time||epoch_millis')
    period = dsl.Integer()
    value = dsl.Double()

    @classmethod
    def underutilized_resources(cls, keys, timespan=timedelta(days=30)):
        keys = any_key_to_string_array(keys)
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum')
        s = s.filter('terms', key=keys)
        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('percentiles',
                   'percentile_ranks',
                   field='value',
                   values=[20, 50])
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        resources = []
        for resource in res['aggregations']['resources']['buckets']:
            if resource['percentiles']['values']['20.0'] == 100:
                res_region, res_id = resource['key'].split('/')
                resources.append(
                    dict(type='EC2 Instance',
                         id=res_id,
                         region=res_region,
                         underutilized=['CPU usage under 20%']))

        return dict(resources=resources)

    @classmethod
    def hourly_cpu_usage(cls, keys, resources=None):
        s = cls.search()
        if isinstance(keys, basestring):
            keys = [keys]
        elif not isinstance(keys, list):
            keys = list(keys)
        assert all(isinstance(key, basestring) for key in keys)
        s = s.filter('terms', key=keys)
        if resources:
            s = s.filter('terms', resource=resources)
        s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum')

        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='time',
                            interval='hour',
                            min_doc_count=1)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        tmp_hours = defaultdict(list)
        for interval in res['aggregations']['intervals']['buckets']:
            interval_hour = interval['key_as_string'].split('T')[1].split(
                ':')[0]
            tmp_hours[interval_hour].append(interval['utilization']['value'])
        hours = OrderedDict(
            zip(["{:02d}".format(x) for x in range(0, 24)],
                itertools.repeat(0)))
        for hour, values in tmp_hours.iteritems():
            hours[hour] = sum(values) / len(values)
        if not tmp_hours:
            return None
        return [
            dict(hour=hour, cpu=float(cpu)) for hour, cpu in hours.iteritems()
        ]

    @classmethod
    def days_of_the_week_cpu_usage(cls, keys, resources=None):
        s = cls.search()
        if isinstance(keys, basestring):
            keys = [keys]
        elif not isinstance(keys, list):
            keys = list(keys)
        assert all(isinstance(key, basestring) for key in keys)
        s = s.filter('terms', key=keys)
        if resources:
            s = s.filter('terms', resource=resources)
        s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum')

        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='time',
                            interval='day',
                            min_doc_count=1)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        tmp_days_of_the_week = defaultdict(list)
        for interval in res['aggregations']['intervals']['buckets']:
            weekday = datetime.strptime(
                interval['key_as_string'].split('T')[0],
                '%Y-%m-%d').date().weekday()
            tmp_days_of_the_week[weekday].append(
                interval['utilization']['value'])
        days = OrderedDict(zip(range(0, 7), itertools.repeat(0)))
        for weekday, values in tmp_days_of_the_week.iteritems():
            days[weekday] = sum(values) / len(values)
        if not tmp_days_of_the_week:
            return None
        return [
            dict(day=calendar.day_name[weekday], cpu=float(cpu))
            for weekday, cpu in days.iteritems()
        ]

    @classmethod
    def daily_cpu_utilization(cls, key):
        s = cls.search()
        s = s.filter('term', key=key)
        s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum')

        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='time',
                            interval='day',
                            min_doc_count=1)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for interval in res['aggregations']['intervals']['buckets']:
            yield interval['key_as_string'].split(
                'T')[0], interval['utilization']['value']

    @classmethod
    def get_cpu_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_instance_read_iops_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EC2:DiskReadOps:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_instance_write_iops_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EC2:DiskWriteOps:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_instance_read_bytes_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EBS:DiskReadBytes:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_instance_write_bytes_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EBS:DiskWriteBytes:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_volume_read_iops_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EBS:VolumeReadOps:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_volume_write_iops_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EBS:VolumeWriteOps:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_volume_read_bytes_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EBS:VolumeReadBytes:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_volume_write_bytes_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EBS:VolumeWriteBytes:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_network_in_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EC2:NetworkIn:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_network_out_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EC2:NetworkOut:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_s3_space_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/S3:BucketSizeBytes:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']
예제 #20
0
class GoogleMetric(dsl.DocType):
    class Meta:
        index = 'googlemetric'
    identity = dsl.String(index='not_analyzed')
    resource = dsl.String(index='not_analyzed')
    metric = dsl.String(index='not_analyzed')
    time = dsl.Date(format='date_optional_time||epoch_millis')
    value = dsl.Double()

    @classmethod
    def daily_cpu_utilization(cls, identity_email):
        s = cls.search()
        s = s.filter('term', identity=identity_email)
        s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/cpu/utilization')

        agg = s.aggs.bucket('intervals', 'date_histogram', field='time', interval='day', min_doc_count=1)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='googlemetric', body=s.to_dict(), size=0)

        for interval in res['aggregations']['intervals']['buckets']:
            yield interval['key_as_string'].split('T')[0], interval['utilization']['value']

    @classmethod
    def get_cpu_usage(cls, identity_email, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/cpu/utilization')
        s = s.filter('term', identity=identity_email)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='googlemetric', body=s.to_dict(), size=0)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_disk_read_iops_usage(cls, identity_email, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/disk/read_ops_count')
        s = s.filter('term', identity=identity_email)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='googlemetric', body=s.to_dict(), size=0)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_disk_write_iops_usage(cls, identity_email, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/disk/write_ops_count')
        s = s.filter('term', identity=identity_email)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='googlemetric', body=s.to_dict(), size=0)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_disk_read_bytes_usage(cls, identity_email, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/disk/read_bytes_count')
        s = s.filter('term', identity=identity_email)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='googlemetric', body=s.to_dict(), size=0)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_disk_write_bytes_usage(cls, identity_email, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/disk/write_bytes_count')
        s = s.filter('term', identity=identity_email)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='googlemetric', body=s.to_dict(), size=0)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_network_in_usage(cls, identity_email, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/network/received_bytes_count')
        s = s.filter('term', identity=identity_email)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='googlemetric', body=s.to_dict(), size=0)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_network_out_usage(cls, identity_email, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/network/sent_bytes_count')
        s = s.filter('term', identity=identity_email)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='googlemetric', body=s.to_dict(), size=0)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']
예제 #21
0
class BaseDocument(seeker.Indexable):
    base_field = dsl.String()
예제 #22
0
class TestModelWithSelfDependencyDocument(BaseDocument):
    # pylint: disable=no-member
    name = dsl.String()

    class Meta:
        index = 'test_model_with_self_dependency_search'
예제 #23
0
class AWSStat(dsl.DocType):
    class Meta:
        index = 'awsstat'

    key = dsl.String(index='not_analyzed')
    time = dsl.Date(format='date_optional_time||epoch_millis')
    stat = dsl.String(index='not_analyzed')
    data = dsl.Object(enabled=False)

    @classmethod
    def latest_instance_stats(cls, key):
        s = cls.search()
        s = s.filter('term', key=key)
        s = s.filter('term', stat='instances').sort('-time')
        res = client.search(index='awsstat',
                            body=s.to_dict(),
                            size=10,
                            request_timeout=60)
        stats = []
        for r in res['hits']['hits']:
            stat = r['_source']['data']
            stat.update(time=r['_source']['time'])
            stats.append(stat)
        stats.sort(key=lambda s: s['time'], reverse=True)
        return dict(stats=stats)

    @classmethod
    def get_latest_instance_states(cls, key, instance_id, days=5):
        s = cls.search()
        s = s.filter('term', key=key)
        s = s.filter('term', stat='instancestate/' + instance_id).sort('-time')
        res = client.search(index='awsstat',
                            body=s.to_dict(),
                            size=days,
                            request_timeout=60)

        states = []
        for r in res['hits']['hits']:
            states.append(
                dict(time=r['_source']['time'],
                     state=r['_source']['data']['state']))
        return states

    @classmethod
    def latest_on_demand_to_reserved_suggestion(cls, keys):
        keys = any_key_to_string_array(keys)
        s = cls.search()
        s = s.filter('terms', key=keys)
        s = s.filter('term', stat='ondemandtoreserved').sort('-time')
        res = client.search(index='awsstat',
                            body=s.to_dict(),
                            size=1,
                            request_timeout=60)

        if res['hits']['total'] > 0:
            return res['hits']['hits'][0]['_source']['data']
        return dict(total=0)

    @classmethod
    def latest_s3_space_usage(cls, keys):
        keys = any_key_to_string_array(keys)
        s = cls.search()
        s = s.filter('terms', key=keys)
        s = s.filter('term', stat='s3spaceusage').sort('-time')
        res = client.search(index='awsstat',
                            body=s.to_dict(),
                            size=1,
                            request_timeout=60)

        if res['hits']['total'] > 0:
            return res['hits']['hits'][0]['_source']['data']
        return None

    @classmethod
    def latest_available_volumes(cls, keys):
        keys = any_key_to_string_array(keys)
        s = cls.search()
        s = s.filter('terms', key=keys)
        s = s.filter('term', stat='detachedvolumes').sort('-time')
        res = client.search(index='awsstat',
                            body=s.to_dict(),
                            size=1,
                            request_timeout=60)

        if res['hits']['total'] > 0:
            return res['hits']['hits'][0]['_source']['data']
        return dict(total=0)

    @classmethod
    def latest_hourly_cpu_usage_by_tag(cls, key):
        s = cls.search()
        s = s.filter('term', key=key)
        s = s.filter('term', stat='hourlycpubytag').sort('-time')
        res = client.search(index='awsstat',
                            body=s.to_dict(),
                            size=1,
                            request_timeout=60)

        if res['hits']['total'] > 0 and 'data' in res['hits']['hits'][0][
                '_source']:
            return res['hits']['hits'][0]['_source']['data']
        return dict(tags=[])

    @classmethod
    def latest_daily_cpu_usage_by_tag(cls, key):
        s = cls.search()
        s = s.filter('term', key=key)
        s = s.filter('term', stat='dailycpubytag').sort('-time')
        res = client.search(index='awsstat',
                            body=s.to_dict(),
                            size=1,
                            request_timeout=60)

        if res['hits']['total'] > 0 and 'data' in res['hits']['hits'][0][
                '_source']:
            return res['hits']['hits'][0]['_source']['data']
        return dict(tags=[])

    @classmethod
    def latest_stopped_instances_report(cls, keys):
        keys = any_key_to_string_array(keys)
        s = cls.search()
        s = s.filter('terms', key=keys)
        s = s.filter('term', stat='stoppedinstancesreport').sort('-time')
        res = client.search(index='awsstat',
                            body=s.to_dict(),
                            size=1,
                            request_timeout=60)

        if res['hits']['total'] > 0:
            return res['hits']['hits'][0]['_source']['data']
        return dict(total=0)
예제 #24
0
class GoogleDailyResource(dsl.DocType):
    class Meta:
        index = 'googledailyresource'

    identity = dsl.String(index='not_analyzed')
    rid = dsl.String(index='not_analyzed')
    product = dsl.String(index='not_analyzed')
    project_name = dsl.String(index='not_analyzed')
    date = dsl.Date(format='date_optional_time||epoch_millis')
    cost = dsl.Double()

    @classmethod
    def daily_compute_cost(cls, identity_email):
        s = cls.search()
        s = s.filter('term', identity=identity_email)
        s = s.filter('term',
                     product='com.google.cloud/services/compute-engine')

        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='date',
                            interval='day',
                            min_doc_count=1)
        agg.metric('cost', 'sum', field='cost')
        res = client.search(index='googledailyresource',
                            body=s.to_dict(),
                            size=0)

        for interval in res['aggregations']['intervals']['buckets']:
            yield interval['key_as_string'].split(
                'T')[0], interval['cost']['value']

    @classmethod
    def daily_cost_by_product(cls,
                              identity_email,
                              timespan=timedelta(days=7),
                              top=4):
        now = datetime.utcnow()
        rollup = cls.rollup_by_product(identity_email, now - timespan, now,
                                       'day', top)

        days = defaultdict(list)
        for interval, product, cost in rollup:
            days[interval.split('T')[0]].append(
                dict(cost=cost, product=get_google_uri_name(product)))

        res = dict(days=[dict(day=d, products=ps) for d, ps in days.items()])
        res['days'] = sorted(res['days'], key=lambda x: x['day'])
        return res

    @classmethod
    def month_cost_by_product(cls, identity_email, top=4):
        now = datetime.utcnow()
        rollup = cls.rollup_by_product(identity_email,
                                       datetime(now.year, now.month, 1),
                                       datetime.utcnow(), 'month', top)
        month = {'products': []}
        for interval, product, cost in rollup:
            month['month'] = '-'.join(interval.split('-')[:2])
            month['products'].append(
                dict(cost=cost, product=get_google_uri_name(product)))

        return month

    @classmethod
    def range_query(cls, identity_email, start, stop):
        s = cls.search()
        s = s.filter('term', identity=identity_email)
        s = s.filter('range',
                     date={
                         'gt': start.isoformat(),
                         'lte': stop.isoformat()
                     })
        return s

    @classmethod
    def rollup_by_product(cls, identity_email, start, stop, interval, top):
        s = cls.range_query(identity_email, start, stop)
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='date',
                            interval=interval,
                            min_doc_count=1)
        agg.bucket('product', 'terms', field='product').metric('cost',
                                                               'sum',
                                                               field='cost')
        res = client.search(index='googledailyresource',
                            body=s.to_dict(),
                            size=0)

        product_costs = defaultdict(float)
        for interval in res['aggregations']['intervals']['buckets']:
            for product in interval['product']['buckets']:
                product_costs[product['key']] += product['cost']['value']

        top_prods = set(
            sorted(product_costs, key=lambda p: product_costs[p],
                   reverse=True)[:top])
        interval_prods = defaultdict(set)

        for interval in res['aggregations']['intervals']['buckets']:
            for product in interval['product']['buckets']:
                if product['key'] in top_prods:
                    yield interval['key_as_string'], product['key'], product[
                        'cost']['value']
                    interval_prods[interval['key_as_string']].add(
                        product['key'])

        for interval, prods in interval_prods.items():
            missing = top_prods - prods
            for prod in missing:
                yield interval, prod, 0.0

    @classmethod
    def monthly_aggregates_resource(cls, identity_email):
        s = cls.search()
        s = s.filter('term', identity=identity_email)
        agg = s.aggs.bucket('months',
                            'date_histogram',
                            field='date',
                            interval='month',
                            min_doc_count=1)
        agg.bucket('rid', 'terms', field='rid',
                   size=0x7FFFFFFF).metric('cost', 'sum', field='cost')
        res = client.search(index='googledailyresource',
                            body=s.to_dict(),
                            size=0)

        months = []
        for month in res['aggregations']['months']['buckets']:
            resources = []
            for resource in month['rid']['buckets']:
                resources.append(
                    dict(cost=resource['cost']['value'],
                         resource=resource['key']))
            if resources == []:
                continue
            months.append(
                dict(month=month['key_as_string'].split('T')[0],
                     resources=resources))

        return dict(months=months)

    @classmethod
    def monthly_aggregates_project(cls, identity_email):
        s = cls.search()
        s = s.filter('term', identity=identity_email)
        agg = s.aggs.bucket('months',
                            'date_histogram',
                            field='date',
                            interval='month',
                            min_doc_count=1)
        agg.bucket('project_name',
                   'terms',
                   field='project_name',
                   size=0x7FFFFFFF).metric('cost', 'sum', field='cost')
        res = client.search(index='googledailyresource',
                            body=s.to_dict(),
                            size=0)

        months = []
        for month in res['aggregations']['months']['buckets']:
            projects = []
            for project in month['project_name']['buckets']:
                projects.append(
                    dict(cost=project['cost']['value'],
                         project=project['key']))
            if projects == []:
                continue
            months.append(
                dict(month=month['key_as_string'].split('T')[0],
                     projects=projects))

        return dict(months=months)
예제 #25
0
class Company(es.DocType):
    class Meta:
        index = 'companies'
        doc_type = 'company'

    french_elision = es.token_filter(
        'french_elision',
        type='elision',
        articles_case=True,
        articles=[
            'l', 'm', 't', 'qu', 'n', 's',
            'j', 'd', 'c', 'jusqu', 'quoiqu',
            'lorsqu', 'puisqu'
        ]
    )

    french_stopwords = es.token_filter(
        'french_stopwords',
        type='stop',
        stopwords='_french_'
    )

    # Do not include this filter if keywords is empty
    french_keywords = es.token_filter(
        'french_keywords',
        type='keyword_marker',
        keywords=[]
    )

    french_stemmer = es.token_filter(
        'french_stemmer',
        type='stemmer',
        language='light_french'
    )

    french_analyzer = es.analyzer(
        'french_analyzer',
        tokenizer='standard',
        filter=[
            'lowercase',
            'asciifolding',
            french_elision,
            french_stopwords,
            # french_keywords,
            french_stemmer
        ],
        char_filter=['html_strip']
    )

    technologies_tokenizer = es.tokenizer(
        'comma_tokenizer',
        type='pattern',
        pattern=' |,|, '
    )

    technologies_synonyms_filter = es.token_filter(
        'technologies_synonyms',
        type='synonym',
        synonyms=[
            'c => c_language',
            'c++, cpp => cpp_language',
            'c/c++, c/cpp => c_language',
            'c/c++, c/cpp => cpp_language',
            'c#, c♯, csharp => csharp_language',
            'f#, f♯, fsharp => fsharp_language',
            'c#, c♯, csharp => dotnet',
            'f#, f♯, fsharp => dotnet',
            '.net => dotnet'
        ]
    )

    technologies_analyzer = es.analyzer(
        'technologies_analyzer',
        tokenizer=technologies_tokenizer,
        filter=[
            'lowercase',
            'asciifolding',
            technologies_synonyms_filter
        ]
    )

    company_name_analyzer = es.analyzer(
        'company_name_analyzer',
        tokenizer='standard',
        filter=[
            'lowercase',
            'asciifolding'
        ]
    )

    id = es.String(index='no')

    name = es.String(analyzer=french_analyzer)

    description = es.String(
        analyzer=french_analyzer,
        fields={
            'technologies': es.String(analyzer=technologies_analyzer)
        }
    )

    technologies = es.String(analyzer=technologies_analyzer)

    url = es.String(index='no')
    logo_url = es.String(index='no')

    address = es.String(analyzer=french_analyzer)
    address_is_valid = es.Boolean()

    email = es.String(index='no')
    phone = es.String(index='no')

    geolocation = es.GeoPoint()
    geolocation_is_valid = es.Boolean()

    def __init__(self, meta=None, **kwargs):
        super(Company, self).__init__(meta, **kwargs)
        self._doc_type.index = compute_index_name(self.index)

    @property
    def index(self):
        return self._doc_type.index

    @property
    def doc_type(self):
        return self._doc_type.name
예제 #26
0
class AWSDetailedLineitem(dsl.DocType):
    class Meta:
        index = 'awsdetailedlineitem'

    availability_zone = dsl.String(index='not_analyzed')
    cost = dsl.Double()
    un_blended_cost = dsl.Double()
    item_description = dsl.String(index='not_analyzed')
    linked_account_id = dsl.String(index='not_analyzed')
    operation = dsl.String()
    payer_account_id = dsl.String(index='not_analyzed')
    pricing_plan_id = dsl.Long()
    product_name = dsl.String(index='not_analyzed')
    rate = dsl.Double()
    un_blended_rate = dsl.Double()
    rate_id = dsl.Long()
    record_id = dsl.String(index='not_analyzed')
    reserved_instance = dsl.Boolean()
    resource_id = dsl.String(index='not_analyzed')
    subscription_id = dsl.Long()
    tag = dsl.Object(
        properties={
            'key': dsl.String(index='not_analyzed'),
            'value': dsl.String(index='not_analyzed')
        })
    usage_end_date = dsl.Date(format='strict_date_optional_time||epoch_millis')
    usage_quantity = dsl.Double()
    usage_start_date = dsl.Date(
        format='strict_date_optional_time||epoch_millis')
    usage_type = dsl.String(index='not_analyzed')

    @classmethod
    @with_cache(ttl=3600 * 3, worker_refresh=True)
    def keys_has_data(cls, keys, date_from=None, date_to=None):
        date_to = date_to or datetime.utcnow()
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        if date_from:
            s = s.filter('range',
                         usage_start_date={
                             'from': date_from.isoformat(),
                             'to': date_to.isoformat()
                         })
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)
        return res['hits']['total'] > 0

    @classmethod
    @with_cache(is_json=False, ret=lambda x: datetime.strptime(x, "%Y-%m-%d"))
    def get_first_date(cls, keys):
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.sort('usage_start_date')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=1,
                            request_timeout=60)
        if res['hits']['total'] == 0:
            return
        return res['hits']['hits'][0]['_source']['usage_start_date'].split(
            'T')[0]

    @classmethod
    @with_cache(is_json=False, ret=lambda x: datetime.strptime(x, "%Y-%m-%d"))
    def get_last_date(cls, keys, limit=None):
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        if limit:
            s = s.filter('range', usage_start_date={'to': limit.isoformat()})
        s = s.sort('-usage_start_date')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=1,
                            request_timeout=60)
        if res['hits']['total'] == 0:
            return
        return res['hits']['hits'][0]['_source']['usage_start_date'].split(
            'T')[0]

    @classmethod
    def get_first_to_now_date(cls, keys):
        def from_date_to_today(d):
            now = datetime.utcnow()
            while d < now:
                yield d
                d += relativedelta(months=1)

        return list(from_date_to_today(cls.get_first_date(keys)))

    @classmethod
    def get_first_to_last_date(cls, keys):
        def from_date_to_last(d):
            last = cls.get_last_date(keys)
            while d < last:
                yield d
                d += relativedelta(months=1)

        return list(from_date_to_last(cls.get_first_date(keys)))

    @classmethod
    @with_cache(6 * 3600)
    def get_available_tags(cls, keys, only_with_data=None, product_name=None):
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        if product_name:
            s = s.filter('term', product_name=product_name)
        s.aggs.bucket('tag_key', 'terms', field='tag.key')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        tags = []
        for tag in res['aggregations']['tag_key']['buckets']:
            if tag['key'].startswith('user:'******'key'].split(':')[1]
                if not only_with_data or name in AWSStat.latest_hourly_cpu_usage_by_tag(
                        only_with_data
                )['tags'] or name in AWSStat.latest_daily_cpu_usage_by_tag(
                        only_with_data)['tags']:
                    tags.append(name)
        tags.sort()
        return dict(tags=tags)

    @classmethod
    @with_cache(ttl=6 * 3600)
    def get_cost_by_tag(cls, keys, tag, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('term', **{'tag.key': 'user:{}'.format(tag)})
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s.aggs.bucket('total_cost', 'sum', field='cost')
        agg = s.aggs.bucket('tag_value',
                            'terms',
                            field='tag.value',
                            size=0x7FFFFFFF)
        agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        tags = [{
            'tag_value': tag['key'],
            'cost': tag['cost']['value'],
        } for tag in res['aggregations']['tag_value']['buckets']]
        return dict(tags=tags,
                    total_cost=res['aggregations']['total_cost']['value'])

    @classmethod
    @with_cache(ttl=6 * 3600)
    def get_cost(cls, keys, date_from, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(
            hour=23, minute=59, second=59, microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s.aggs.bucket('total_cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        return dict(total_cost=res['aggregations']['total_cost']['value'])

    @classmethod
    @with_cache()
    def get_monthly_cost_by_tag(cls, keys, tag, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('term', **{'tag.key': 'user:{}'.format(tag)})
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='month',
                            min_doc_count=1)
        agg.bucket('total_cost', 'sum', field='cost')
        agg = agg.bucket('tag_value',
                         'terms',
                         field='tag.value',
                         size=0x7FFFFFFF)
        agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        months = [{
            'month':
            interval['key_as_string'].split('T')[0][:-3],
            'tags': [{
                'tag_value': tag['key'],
                'cost': tag['cost']['value'],
            } for tag in interval['tag_value']['buckets']],
            'total_cost':
            interval['total_cost']['value'],
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(months=months)

    @classmethod
    @with_cache()
    def get_cost_by_product(cls,
                            key,
                            date_from=None,
                            date_to=None,
                            without_discount=False,
                            only_discount=False,
                            size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        if without_discount:
            s = s.query(
                'bool',
                filter=[
                    ~dsl.Q('term', item_description='PAR_APN_ProgramFee_2500')
                ])
        if only_discount:
            s = s.filter('term', item_description='PAR_APN_ProgramFee_2500')
        agg = s.aggs.bucket('products',
                            'terms',
                            field='product_name',
                            order={'cost': 'desc'},
                            size=size)
        agg.bucket('cost', 'sum', field='cost')
        s = s.query('bool', filter=[~dsl.Q('term', cost=0)])
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        products = [{
            'product': SHORT_NAMES.get(product['key'], product['key']),
            'cost': product['cost']['value'],
        } for product in res['aggregations']['products']['buckets']]
        return dict(products=products)

    @classmethod
    @with_cache()
    def get_cost_by_region(cls,
                           keys,
                           tagged=False,
                           byaccount=False,
                           date_from=None,
                           date_to=None,
                           size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })

        agg = s.aggs
        if byaccount:
            agg = agg.bucket('accounts', 'terms', field='linked_account_id')
        agg = agg.bucket('intervals',
                         'date_histogram',
                         field='usage_start_date',
                         interval='month',
                         min_doc_count=1)
        agg = agg.bucket('regions',
                         'terms',
                         field='availability_zone',
                         size=size)
        agg.bucket('cost', 'sum', field='cost')
        if tagged:
            agg = agg.bucket('tags', 'terms', field='tag.value')
            agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0)

        return res['aggregations']

    @classmethod
    @with_cache()
    def get_monthly_cost(cls,
                         keys,
                         date_from=None,
                         date_to=None,
                         size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='month',
                            min_doc_count=1)
        agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        res = [{
            'month': interval['key_as_string'].split('T')[0],
            'total_cost': interval['cost']['value'],
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(months=res)

    @classmethod
    @with_cache()
    def get_monthly_cost_by_product(cls,
                                    keys,
                                    tagged=False,
                                    date_from=None,
                                    date_to=None,
                                    size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='month',
                            min_doc_count=1)
        agg = agg.bucket('products', 'terms', field='product_name', size=size)
        agg.bucket('cost', 'sum', field='cost')
        if tagged:
            agg = agg.bucket('tags', 'terms', field='tag.value')
            agg.bucket('cost', 'sum', field='cost')
        s = s.query('bool', filter=[~dsl.Q('term', cost=0)])
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        def tagged_cost(bucket, total):
            total_tag = 0.0
            for tag in bucket:
                total_tag += tag['cost']['value']
                yield (tag['key'], tag['cost']['value'])
            if total != total_tag:
                yield ('untagged', total - total_tag)

        res = [{
            'month':
            interval['key_as_string'].split('T')[0],
            'products': [{
                'product':
                SHORT_NAMES.get(product['key'], product['key']),
                'cost':
                product['cost']['value'],
                'tags': [{
                    'name': tag[0],
                    'cost': tag[1],
                } for tag in tagged_cost(product['tags']['buckets'],
                                         product['cost']['value'])],
            } for product in interval['products']['buckets']] if tagged else [{
                'product':
                SHORT_NAMES.get(product['key'], product['key']),
                'cost':
                product['cost']['value'],
            } for product in interval['products']['buckets']]
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(months=res)

    @classmethod
    @with_cache(ttl=4 * 3600)
    def get_daily_cost_by_product(cls,
                                  keys,
                                  date_from=None,
                                  date_to=None,
                                  size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(
            hour=23, minute=59, second=59, microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='day',
                            min_doc_count=1)
        agg = agg.bucket('products', 'terms', field='product_name', size=size)
        agg.metric('cost', 'sum', field='cost')
        s = s.query('bool', filter=[~dsl.Q('term', cost=0)])
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        res = [{
            'day':
            interval['key_as_string'].split('T')[0],
            'products': [{
                'product':
                SHORT_NAMES.get(product['key'], product['key']),
                'cost':
                product['cost']['value'],
            } for product in interval['products']['buckets']]
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(days=res)

    @classmethod
    @with_cache(ttl=24 * 3600)
    def get_yearly_cost_by_product(cls,
                                   keys,
                                   date_from=None,
                                   date_to=None,
                                   size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(month=12,
                                               day=31,
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='year',
                            min_doc_count=1)
        agg = agg.bucket('products', 'terms', field='product_name', size=size)
        agg.metric('cost', 'sum', field='cost')
        s = s.query('bool', filter=[~dsl.Q('term', cost=0)])
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        res = [{
            'year':
            interval['key_as_string'][:4],
            'products': [{
                'product':
                SHORT_NAMES.get(product['key'], product['key']),
                'cost':
                product['cost']['value'],
            } for product in interval['products']['buckets']]
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(years=res)

    @classmethod
    @with_cache()
    def get_cost_by_resource(cls,
                             keys,
                             date_from=None,
                             date_to=None,
                             search=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        if search:
            s = s.query('wildcard', resource_id='*{}*'.format(search))
        agg = s.aggs.bucket('resources',
                            'terms',
                            field='resource_id',
                            order={'cost': 'desc'},
                            size=0x7FFFFFFF)
        agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        resources = [{
            'resource': resource['key'],
            'cost': resource['cost']['value'],
        } for resource in res['aggregations']['resources']['buckets']]
        return resources

    @classmethod
    def get_monthly_cost_by_resource(cls,
                                     resource_ids,
                                     date_from=None,
                                     date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        if resource_ids:
            s = cls.search()
            s = s.filter('range',
                         usage_start_date={
                             'from': date_from.isoformat(),
                             'to': date_to.isoformat()
                         })
            s = s.filter('terms', resource_id=list(resource_ids))
            agg = s.aggs.bucket('months',
                                'date_histogram',
                                field='usage_start_date',
                                interval='month',
                                min_doc_count=1)
            agg.metric('cost', 'sum', field='cost')
            r = client.search('awsdetailedlineitem',
                              body=s.to_dict(),
                              size=0,
                              request_timeout=60)
            return {
                e['key_as_string']: e['cost']['value']
                for e in r['aggregations']['months']['buckets']
            }
        else:
            return {}

    @classmethod
    @with_cache()
    def get_lambda_usage(cls, keys, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('term', product_name='AWS Lambda')
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('resources',
                            'terms',
                            field='resource_id',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'avg', field='cost')
        agg = agg.bucket('types', 'terms', field='usage_type', size=0x7FFFFFFF)
        agg.metric('quantity', 'sum', field='usage_quantity')
        agg = agg.bucket('descriptions',
                         'terms',
                         field='item_description',
                         size=0x7FFFFFFF)
        agg.metric('quantity', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        #return res

        def _lambda_usage_regb(buckets, endswith):
            for b in buckets:
                if b['key'].endswith(endswith):
                    return b['quantity']['value']

        usages = [{
            'rid':
            usage['key'],
            'name':
            usage['key'].split(':')[-1],
            'requests':
            _lambda_usage_regb(usage['types']['buckets'], '-Request'),
            'gb_seconds':
            _lambda_usage_regb(usage['types']['buckets'], '-Lambda-GB-Second'),
            'cost':
            usage['cost']['value'],
            'raw_cost':
            lambdapricing.get_raw_cost([
                x['descriptions']['buckets'] for x in usage['types']['buckets']
            ]),
        } for usage in res['aggregations']['resources']['buckets']]
        return usages

    @classmethod
    @with_cache()
    def get_s3_bandwidth_costs(cls, key, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('term', product_name='Amazon Simple Storage Service')
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('types',
                            'terms',
                            field='usage_type',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'sum', field='cost')
        agg.metric('gb', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        transfers = [{
            'type': transfer['key'],
            'quantity': transfer['gb']['value'],
            'cost': transfer['cost']['value'],
        } for transfer in res['aggregations']['types']['buckets']]
        return transfers

    @classmethod
    @with_cache()
    def get_ec2_bandwidth_costs(cls, key, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('term', product_name='Amazon Elastic Compute Cloud')
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('types',
                            'terms',
                            field='usage_type',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'sum', field='cost')
        agg.metric('gb', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        transfers = [{
            'type': transfer['key'],
            'quantity': transfer['gb']['value'],
            'cost': transfer['cost']['value'],
        } for transfer in res['aggregations']['types']['buckets']]
        return transfers

    @classmethod
    def get_ec2_daily_cost(cls, key):
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('term', product_name='Amazon Elastic Compute Cloud')

        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='day',
                            min_doc_count=1)
        agg.metric('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for interval in res['aggregations']['intervals']['buckets']:
            yield interval['key_as_string'].split(
                'T')[0], interval['cost']['value']

    @classmethod
    @with_cache()
    def get_elb_usage_a_day(cls, keys, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        gib = Fraction(2**30)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s = s.filter("prefix", resource_id="arn:aws:elasticloadbalancing")
        s = s.sort({"usage_start_date": {"order": "desc"}})
        agg = s.aggs.bucket('rid',
                            'terms',
                            field='resource_id',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'sum', field='cost')
        agg = agg.bucket('types', 'terms', field='usage_type', size=0x7FFFFFFF)
        agg.metric('quantity', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)
        elbs = [{
            'rid':
            elb['key'],
            'cost':
            elb['cost']['value'] / (date_to - date_from).days,
            'hours':
            float(
                sum([
                    x['quantity']['value'] for x in elb['types']['buckets']
                    if x['key'].endswith('LoadBalancerUsage')
                ]) / (date_to - date_from).days),
            'bytes':
            float((sum([
                x['quantity']['value']
                for x in elb['types']['buckets'] if x['key'].endswith('Bytes')
            ]) * gib) / (date_to - date_from).days),
        } for elb in res['aggregations']['rid']['buckets']]
        return elbs

    @classmethod
    @with_cache()
    def get_instance_type(cls, keys, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.extra(_source=[
            'usage_start_date', 'usage_type', 'availability_zone',
            'resource_id'
        ])
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s = s.filter("term", product_name='Amazon Elastic Compute Cloud')
        s = s.query('wildcard', usage_type='*BoxUsage:*')
        s = s.filter('exists', field='resource_id')
        s = s.sort({"usage_start_date": {"order": "desc"}})
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=10000,
                            request_timeout=60)

        def cut_region_name(s):
            return s[:-1] if s[-1].isalpha() else s

        types = []
        refs = {}

        def add_in_types(type, rid):
            ref_tuple = (type['hour'], type['instance'], type['region'])
            if ref_tuple in refs:
                refs[ref_tuple]['rids'].append(rid)
                refs[ref_tuple]['ridCount'] += 1
                return
            type['rids'] = [rid]
            types.append(type)
            refs[ref_tuple] = types[-1]

        for r in res['hits']['hits']:
            elem = {
                'hour':
                r['_source']['usage_start_date'],
                'instance':
                r['_source']['usage_type'].split(':')[1],
                'region':
                cut_region_name(r['_source']['availability_zone'])
                if 'availability_zone' in r['_source'] else 'unknown',
                'ridCount':
                1,
            }
            add_in_types(elem, r['_source']['resource_id'])
        return types

    @classmethod
    @with_cache()
    def get_instance_hour(cls,
                          keys,
                          date_from=None,
                          date_to=None,
                          min_hour=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s = s.filter("term", product_name='Amazon Elastic Compute Cloud')
        s = s.filter('prefix', resource_id='i-')
        s = s.query('wildcard', usage_type='*BoxUsage*')
        agg = s.aggs.bucket('resource_id',
                            'terms',
                            field='resource_id',
                            size=0x7FFFFFFF)
        agg.bucket('days',
                   'date_histogram',
                   field='usage_start_date',
                   interval='day',
                   min_doc_count=1)
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        instance_list = []
        for instance in res['aggregations']['resource_id']['buckets']:
            tmp_hours = []
            for day in instance['days']['buckets']:
                tmp_hours.append(day['doc_count'])
            avg_hours = sum(tmp_hours) / float(len(tmp_hours))
            if not min_hour or avg_hours >= min_hour:
                instance_list.append(dict(id=instance['key'], hours=avg_hours))
        return sorted(instance_list, key=lambda x: x['hours'], reverse=True)

    @classmethod
    @with_cache()
    def get_s3_buckets_per_tag(cls, keys):
        def _check_if_in_list(dict_list, value, key):
            return next((item for item in dict_list if item[key] == value),
                        None)

        def _parse_tag_keys_results(res):
            bucket_tagged = []
            for bucket_tag_key in res['aggregations']['tag_key']['buckets']:
                buff_tag_key = _check_if_in_list(bucket_tagged,
                                                 bucket_tag_key['key'],
                                                 'tag_key')
                if buff_tag_key is None:
                    buff_tag_key = {
                        "tag_key": bucket_tag_key['key'],
                        "tag_value": []
                    }
                buff_tag_key = _parse_tag_values_results(
                    bucket_tag_key, buff_tag_key)
                bucket_tagged.append(buff_tag_key)
            return bucket_tagged

        def _parse_tag_values_results(bucket_tag_key, buff_tag_key):
            for bucket_tag_value in bucket_tag_key['tag_value']['buckets']:
                buff_tag_value = _check_if_in_list(buff_tag_key['tag_value'],
                                                   bucket_tag_value['key'],
                                                   'tag_value')
                if buff_tag_value is None:
                    buff_tag_value = {
                        "tag_value": bucket_tag_value['key'],
                        "s3_buckets": []
                    }
                buff_tag_value = _parse_buckets_results(
                    buff_tag_value, bucket_tag_value)
                buff_tag_key['tag_value'].append(buff_tag_value)
            return buff_tag_key

        def _parse_buckets_results(buff_tag_value, bucket_tag_value):
            for bucket_resource_id in bucket_tag_value['ressource_id'][
                    'buckets']:
                buff_bucket_resource_id = _check_if_in_list(
                    buff_tag_value['s3_buckets'], bucket_resource_id['key'],
                    'bucket_name')
                if buff_bucket_resource_id is None:
                    buff_bucket_resource_id = {
                        "bucket_name":
                        bucket_resource_id['key'],
                        "account_id":
                        bucket_resource_id['account_id']['buckets'][0]['key']
                    }
                buff_tag_value['s3_buckets'].append(buff_bucket_resource_id)
            return buff_tag_value

        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('term', product_name='Amazon Simple Storage Service')
        s = s.query('exists', field="tag")
        s = s.query('wildcard', item_description="*storage*")
        agg = s.aggs.bucket('tag_key', 'terms', field="tag.key")
        agg = agg.bucket('tag_value', 'terms', field='tag.value')
        agg.bucket('ressource_id', 'terms',
                   field='resource_id').bucket('account_id',
                                               'terms',
                                               field='linked_account_id')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)
        '''
        bucket_tagged structure
        [{
            "tag_key" : "KEY", # Unique in list
            "tag_value": [{
                "tag_value": "VALUE", # Unique in list
                "s3_buckets": [{
                    "bucket_name": "BUCKET_NAME",
                    "account_id": "ACCOUND_ID"
                }, {...}]
            }, {...}]
        }, {...}]
        '''

        bucket_tagged = _parse_tag_keys_results(res)
        return bucket_tagged

    @classmethod
    @with_cache()
    def get_s3_bandwidth_info_and_cost_per_name(cls,
                                                key,
                                                bucket_resource_ids,
                                                date_from=None,
                                                date_to=None):
        date_from = date_from or (datetime.utcnow() - relativedelta(
            month=1)).replace(day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('term', product_name='Amazon Simple Storage Service')
        s = s.filter('terms',
                     resource_id=bucket_resource_ids if isinstance(
                         bucket_resource_ids, list) else [bucket_resource_ids])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s = s.filter('wildcard', usage_type="*Bytes")
        agg = s.aggs.bucket('bucket_name',
                            'terms',
                            field='resource_id',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'sum', field='cost')
        agg = agg.bucket('transfer_type', 'terms', field='usage_type')
        agg.metric('data', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)
        data = [{
            "bucket_name":
            bucket['key'],
            "cost":
            bucket['cost']['value'],
            "transfer_stats": [{
                "type": transfer_stat['key'],
                "data": transfer_stat['data']['value']
            } for transfer_stat in bucket['transfer_type']['buckets']]
        } for bucket in res['aggregations']['bucket_name']['buckets']]
        return data
예제 #27
0
class Organisatie(es.DocType):
    ext_id = es.String(index='not_analyzed')
    naam = es.String(analyzer=dutch_analyzer)  # ngram
    beschrijving = es.String(analyzer=dutch_analyzer)
    afdeling = es.String(index='not_analyzed')
예제 #28
0
파일: models.py 프로젝트: groovecoder/fjord
class ResponseDocType(FjordDocType):
    id = es_dsl.Integer()
    happy = es_dsl.Boolean()
    api = es_dsl.Integer()
    url = es_dsl.String(index='not_analyzed')
    url_domain = es_dsl.String(index='not_analyzed')
    has_email = es_dsl.Boolean()
    description = es_dsl.String(analyzer='snowball')
    category = es_dsl.String(index='not_analyzed')
    description_bigrams = es_dsl.String(index='not_analyzed')
    description_terms = es_dsl.String(analyzer='standard')
    user_agent = es_dsl.String(index='not_analyzed')
    product = es_dsl.String(index='not_analyzed')
    channel = es_dsl.String(index='not_analyzed')
    version = es_dsl.String(index='not_analyzed')
    browser = es_dsl.String(index='not_analyzed')
    browser_version = es_dsl.String(index='not_analyzed')
    platform = es_dsl.String(index='not_analyzed')
    locale = es_dsl.String(index='not_analyzed')
    country = es_dsl.String(index='not_analyzed')
    device = es_dsl.String(index='not_analyzed')
    manufacturer = es_dsl.String(index='not_analyzed')
    source = es_dsl.String(index='not_analyzed')
    campaign = es_dsl.String(index='not_analyzed')
    souce_campaign = es_dsl.String(index='not_analyzed')
    organic = es_dsl.Boolean()
    created = es_dsl.Date()

    docs = ResponseDocTypeManager()

    class Meta:
        pass

    def mlt(self):
        """Returns a search with a morelikethis query for docs like this"""
        # Short responses tend to not repeat any words, so then MLT
        # returns nothing. This fixes that by setting min_term_freq to
        # 1. Longer responses tend to repeat important words, so we can
        # set min_term_freq to 2.
        num_words = len(self.description.split(' '))
        if num_words > 40:
            min_term_freq = 2
        else:
            min_term_freq = 1

        s = self.search()
        if self.product:
            s = s.filter('term', product=self.product)
        if self.platform:
            s = s.filter('term', platform=self.platform)

        s = s.query('more_like_this',
                    fields=['description'],
                    docs=[{
                        '_index': get_index_name(),
                        '_type': self._doc_type.name,
                        '_id': self.id
                    }],
                    min_term_freq=min_term_freq,
                    stop_words=list(ANALYSIS_STOPWORDS))
        return s

    @classmethod
    def get_model(cls):
        return Response

    @classmethod
    def public_fields(cls):
        """Fields that can be publicly-visible

        .. Note::

           Do NOT include fields that have PII in them.

        """
        return ('id', 'happy', 'api', 'url_domain', 'has_email', 'description',
                'category', 'description_bigrams', 'user_agent', 'product',
                'version', 'platform', 'locale', 'source', 'campaign',
                'organic', 'created')

    @property
    def truncated_description(self):
        """Shorten feedback for dashboard view."""
        return smart_truncate(self.description, length=500)

    @classmethod
    def extract_doc(cls, resp, with_id=True):
        """Converts a Response to a dict of values

        This can be used with ``ResponseDocType.from_obj()`` to create a
        ``ResponseDocType`` object or it can be used for indexing.

        :arg resp: a Response object
        :arg with_id: whether or not to include the ``_id`` value--include
            it when you're bulk indexing

        :returns: a dict

        """
        doc = {
            'id':
            resp.id,
            'happy':
            resp.happy,
            'api':
            resp.api,
            'url':
            resp.url,
            'url_domain':
            resp.url_domain,
            'has_email':
            bool(resp.user_email),
            'description':
            resp.description,
            'user_agent':
            resp.user_agent,
            'product':
            resp.product,
            'channel':
            resp.channel,
            'version':
            resp.version,
            'browser':
            resp.browser,
            'browser_version':
            resp.browser_version,
            'platform':
            resp.platform,
            'locale':
            resp.locale,
            'country':
            resp.country,
            'device':
            resp.device,
            'manufacturer':
            resp.manufacturer,
            'source':
            resp.source,
            'campaign':
            resp.campaign,
            'source_campaign':
            '::'.join([(resp.source or '--'), (resp.campaign or '--')]),
            'organic': (not resp.campaign),
            'created':
            resp.created
        }

        # We only compute bigrams for english because the analysis
        # uses English stopwords, stemmers, ...
        if resp.locale.startswith(u'en') and resp.description:
            doc['description_bigrams'] = compute_grams(resp.description)
        else:
            doc['description_bigrams'] = []

        if with_id:
            doc['_id'] = doc['id']
        return doc
예제 #29
0
class Activiteit(es.DocType):
    ext_id = es.String(index='not_analyzed')
    naam = es.String(analyzer=dutch_analyzer)
    beschrijving = es.String(analyzer=dutch_analyzer)
    bron_link = es.String(index='not_analyzed')
    tijdstip = es.String(index='not_analyzed')
    tags = es.String(index='not_analyzed')
    centroid = es.GeoPoint()
    locatie = es.Object(doc_class=Locatie,
                        properties={
                            'ext_id':
                            es.String(index='not_analyzed'),
                            'naam':
                            es.String(analyzer=dutch_analyzer),
                            'centroid':
                            es.GeoPoint(),
                            'openbare_ruimte_naam':
                            es.String(index='not_analyzed'),
                            'huisnummer':
                            es.String(index='not_analyzed'),
                            'huisnummer_toevoeging':
                            es.String(index='not_analyzed'),
                            'postcode':
                            es.String(index='not_analyzed')
                        })
예제 #30
0
class AWSAccessLog(dsl.DocType):
    class Meta:
        index = 'awsaccesslog'

    key = dsl.String(index='not_analyzed')
    resource = dsl.String(index='not_analyzed')
    time = dsl.Date(format='date_optional_time||epoch_millis')
    period = dsl.Integer()
    bucket = dsl.String(index='not_analyzed')
    object = dsl.String(index='not_analyzed')

    @classmethod
    def most_accessed_s3_objects(cls, key):
        s = cls.search()
        s = s.filter('term', key=key)
        agg = s.aggs.bucket('objects', 'terms', field='object')
        agg.bucket('buckets', 'terms', field='bucket')
        res = client.search(index='awsaccesslog',
                            body=s.to_dict(),
                            request_timeout=60)

        objects = []
        for object in res['aggregations']['objects']['buckets']:
            objects.append(
                dict(bucket=object['buckets']['buckets'][0]['key'],
                     object=object['key'],
                     access_count=object['doc_count']))
        return dict(objects=objects)

    @classmethod
    def last_accessed_s3_objects(cls, key):
        s = cls.search()
        s = s.filter('term', key=key)
        agg = s.aggs.bucket('objects', 'terms', field='object')
        res = client.search(index='awsaccesslog',
                            body=s.to_dict(),
                            request_timeout=60)
        objects = []
        for object in res['aggregations']['objects']['buckets']:
            s2 = cls.search()
            s2 = s2.query("match", object=object['key'])
            s2 = s2.sort('-time')
            res2 = client.search(index='awsaccesslog',
                                 body=s2.to_dict(),
                                 request_timeout=60)
            objects.append(
                dict(object=object['key'],
                     bucket=res2['hits']['hits'][0]['_source']['bucket'],
                     last_access=res2['hits']['hits'][0]['_source']['time']))
        return dict(objects=objects)

    @classmethod
    def last_access_s3_bucket(cls, key, bucket):
        s = cls.search()
        s = s.filter('term', key=key).filter('term', bucket=bucket)
        s.sort('-time')
        res = client.search(index='awsaccesslog',
                            body=s.to_dict(),
                            request_timeout=60)
        if res['hits']['total'] > 0:
            return res['hits']['hits'][0]['_source']['time']
        else:
            return 'never_accessed'