class AWSELBInfo(dsl.DocType): class Meta: index = 'awselbinfo' linked_account_id = dsl.String(index='not_analyzed') name = dsl.String(index='not_analyzed') region = dsl.String(index='not_analyzed') instances = dsl.String() @classmethod def init(cls, index=None, using=None): client.indices.create('awselbinfo', ignore=400) client.indices.put_mapping(index='awselbinfo', doc_type='a_ws_el_binfo', body={'_ttl': { 'enabled': True }}) cls._doc_type.init(index, using) @classmethod def get_elb_info(cls, key): s = cls.search() s = s.filter('term', linked_account_id=key) s = s.sort('-_ttl') res = client.search(index='awselbinfo', body=s.to_dict(), size=10000, request_timeout=60) if res['hits']['total'] == 0: return [] return [{ 'instances': elb['_source']['instances'].split(' '), 'name': elb['_source']['name'], 'region': elb['_source']['region'], } for elb in res['hits']['hits']]
class GroupDocument(esd.DocType): date = esd.Date() aperture = esd.Float() exposure = esd.Float() focal_length = esd.Float() focal_length_35 = esd.Float() iso = esd.Integer() model = esd.String(index='not_analyzed') #analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ])) lens = esd.String(index='not_analyzed') path = esd.String(index='not_analyzed') dirname = esd.String(index='not_analyzed') basename = esd.String(index='not_analyzed')
class BaseDocument(dsl.DocType): """Base document class to build ElasticSearch documents. This is standard ``elasticsearch-dsl`` ``DocType`` class with already added fields for handling permissions. """ #: list of user ids with view permission on the object users_with_permissions = dsl.String(multi=True) #: list of group ids with view permission on the object groups_with_permissions = dsl.String(multi=True)
class TestSearchDocument(BaseDocument): # pylint: disable=no-member name = dsl.String() num = dsl.Integer() json = dsl.Object() class Meta: index = 'test_search'
class AWSIdNameMapping(dsl.DocType): class Meta: index = 'awsidnamemapping' key = dsl.String(index='not_analyzed') rid = dsl.String(index='not_analyzed') name = dsl.String(index='not_analyzed') date = dsl.Date(format='date_optional_time||epoch_millis') @classmethod def get_id_name_mapping(cls, key): s = cls.search() s = s.query('match', key=key).sort('-date') res = {} for hit in s.scan(): if hit.rid not in res: res[hit.rid] = hit.name return res
class InfoRiegoRecord(dsl.DocType): code = dsl.String() location = dsl.String() date = dsl.Date() rain = dsl.Float() temperature = dsl.Float() rel_humidity = dsl.Float() radiation = dsl.Float() wind_speed = dsl.Float() wind_direction = dsl.Float() lat_lon = dsl.GeoPoint(lat_lon=True) station_height = dsl.Integer() def save(self, **kwargs): return super(InfoRiegoRecord, self).save(**kwargs) class Meta: index = 'inforiego'
class TestAnalyzerSearchDocument(BaseDocument): # pylint: disable=no-member name = dsl.String(analyzer=dsl.analyzer( 'test_analyzer', tokenizer='keyword', filter=[ 'lowercase', ], )) class Meta: index = 'test_analyzer_search'
class TestSearchDocument(BaseDocument): # pylint: disable=no-member id = dsl.Integer() # pylint: disable=invalid-name name = dsl.String() num = dsl.Integer() json = dsl.Object() field_name = Name() field_process_type = ProcessType() none_test = dsl.Integer() class Meta: index = 'test_search'
class Locatie(es.DocType): ext_id = es.String(index='not_analyzed') naam = es.String(analyzer=dutch_analyzer) centroid = es.GeoPoint() openbare_ruimte_naam = es.String(index='not_analyzed') huisnummer = es.String(index='not_analyzed') huisnummer_toevoeging = es.String(index='not_analyzed') postcode = es.String(index='not_analyzed')
class sigpac_record(dsl.DocType): dn_pk = dsl.Long() provincia = dsl.Integer() municipio = dsl.Integer() poligono = dsl.Integer() parcela = dsl.Integer() recinto = dsl.Integer() zona = dsl.Integer() perimetro = dsl.Long() superficie = dsl.Long() pend_med = dsl.Integer() points = dsl.GeoShape() bbox = dsl.GeoShape() bbox_center = dsl.GeoPoint(lat_lon=True) uso_sigpac = dsl.String() agregado = dsl.Integer() cap_auto = dsl.Integer() cap_manual = dsl.Integer() coef_regadio = dsl.Float() c_refpar = dsl.String() c_refpol = dsl.String() c_refrec = dsl.String() dn_oid = dsl.Long() elevation = dsl.Float() def save(self, **kwargs): return super(sigpac_record, self).save(**kwargs) class Meta: index = 'plots' doc_type = 'sigpac'
class MappingSearchDocument(BaseDocument): """Index for mapping search.""" # pylint: disable=no-member relation_type = dsl.String(index='not_analyzed') source_db = dsl.String(index='not_analyzed') source_id = dsl.String(index='not_analyzed') source_species = dsl.String(index='not_analyzed') target_db = dsl.String(index='not_analyzed') target_id = dsl.String(index='not_analyzed') target_species = dsl.String(index='not_analyzed') relation_type = dsl.String(index='not_analyzed') class Meta: """Meta class for mapping search document.""" index = 'mapping_search'
def document_field(field): """ The default ``field_factory`` method for converting Django field instances to ``elasticsearch_dsl.Field`` instances. Auto-created fields (primary keys, for example) and one-to-many fields (reverse FK relationships) are skipped. """ if field.auto_created or field.one_to_many: return None if field.many_to_many: return RawMultiString defaults = { models.DateField: dsl.Date(), models.DateTimeField: dsl.Date(), models.IntegerField: dsl.Long(), models.PositiveIntegerField: dsl.Long(), models.BooleanField: dsl.Boolean(), models.NullBooleanField: dsl.Boolean(), models.SlugField: dsl.String(index='not_analyzed'), models.DecimalField: dsl.Double(), models.FloatField: dsl.Float(), } return defaults.get(field.__class__, RawString)
class PhotoDocument(esd.DocType): date = esd.Date() aperture = esd.Float() exposure = esd.Float() focal_length = esd.Float() focal_length_35 = esd.Float() iso = esd.Integer() size = esd.Integer() model = esd.String(index='not_analyzed') #analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ])) model_ci = esd.String(analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ])) lens = esd.String(index='not_analyzed') lens_ci = esd.String(analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ])) path = esd.String(index='not_analyzed') dirname = esd.String(index='not_analyzed') basename = esd.String(index='not_analyzed') def extended_dict(self): dct = self.to_dict() dct["id"] = self.meta.id return dct
class Geocomplete(es.DocType): class Meta: index = 'geocomplete' doc_type = 'geoloc-entry' french_elision = es.token_filter('french_elision', type='elision', articles_case=True, articles=[ 'l', 'm', 't', 'qu', 'n', 's', 'j', 'd', 'c', 'jusqu', 'quoiqu', 'lorsqu', 'puisqu' ]) geocompletion_ngram_filter = es.token_filter('geocompletion_ngram', type='edgeNGram', min_gram=1, max_gram=50, side='front') town_filter = es.token_filter('town_filter', type='pattern_replace', pattern=' ', replacement='-') geocompletion_index_tokenizer = es.tokenizer( 'geocompletion_index_tokenizer', type='pattern', pattern='@') geocompletion_index_analyzer = es.analyzer( 'geocompletion_index_analyzer', type='custom', tokenizer=geocompletion_index_tokenizer, filter=[ 'lowercase', 'asciifolding', french_elision, town_filter, geocompletion_ngram_filter ]) geocompletion_search_analyzer = es.analyzer( 'geocompletion_search_analyzer', type='custom', tokenizer=geocompletion_index_tokenizer, filter=['lowercase', 'asciifolding', town_filter, french_elision]) name = es.String(index='analyzed', analyzer=geocompletion_index_analyzer, search_analyzer=geocompletion_search_analyzer, fields=dict(raw=es.String(index='not_analyzed'))) complement = es.String(index='not_analyzed') postal_code_ngram_filter = es.token_filter('postal_code_ngram', type='edgeNGram', min_gram=1, max_gram=5, side='front') postal_code_index_analyzer = es.analyzer('postal_code_index_analyzer', type='custom', tokenizer='standard', filter=[postal_code_ngram_filter]) postal_code_search_analyzer = es.analyzer('postal_code_search_analyzer', type='custom', tokenizer='standard') postal_code = es.String(index='analyzed', analyzer=postal_code_index_analyzer, search_analyzer=postal_code_search_analyzer, fields=dict(raw=es.String(index='not_analyzed'))) geolocation = es.GeoPoint() weight = es.Float() def __init__(self, meta=None, **kwargs): super(Geocomplete, self).__init__(meta, **kwargs) if self.index in compute_index_name(self.index): self._doc_type.index = compute_index_name(self.index) @property def index(self): return self._doc_type.index @property def doc_type(self): return self._doc_type.name
class S3BucketFile(dsl.DocType): class Meta: index = 's3bucketfile' bucket = dsl.String(index='not_analyzed') key = dsl.String(index='not_analyzed', fields={'path': dsl.String(analyzer=directory_analyzer)}) tags = dsl.String(index='not_analyzed', fields={'key': dsl.String(analyzer=tag_key_analyzer)}) size = dsl.Integer() modified = dsl.Date(format='date_optional_time||epoch_millis') @classmethod def get_bucket_sizes(cls, buckets): s = cls.search() s = s.filter('terms', bucket=buckets) agg = s.aggs.bucket('buckets', 'terms', field='bucket', size=len(buckets)) agg.metric('size', 'sum', field='size') res = client.search(index='s3bucketfile', body=s.to_dict(), size=0) for bucket in res['aggregations']['buckets']['buckets']: yield bucket['key'], bucket['size']['value'] @classmethod def get_dir_sizes(cls, bucket, path=None): s = cls.search() s = s.filter('term', bucket=bucket) if path: s = s.filter({'term': {'key.path': path}}) path_regex = '[^/]+' if path: path_regex = path + '/' + path_regex agg = s.aggs.bucket('dirs', 'terms', field='key.path', size=1000, include=path_regex) agg.metric('size', 'sum', field='size') res = client.search(index='s3bucketfile', body=s.to_dict(), size=0) for directory in res['aggregations']['dirs']['buckets']: key = directory['key'] if path: key = key.replace(path, '') if key.startswith('/'): key = key[1:] yield key, directory['size']['value'] @classmethod def get_bucket_tags(cls, buckets, tags=[], tagkey=None): return cls.get_dir_tags(buckets, tags=tags, tagkey=tagkey) @classmethod def get_dir_tags(cls, buckets, path=None, tags=[], tagkey=None): s = cls.search() if isinstance(buckets, list): s = s.filter('terms', bucket=buckets) else: s = s.filter('term', bucket=buckets) if path: s = s.filter({'term': {'key.path': path}}) if tags: s = s.filter('terms', tags=tags) if tagkey: agg = s.aggs.bucket('tags', 'terms', field='tags', include=re.escape(tagkey) + '=.*', size=500) else: agg = s.aggs.bucket('tags', 'terms', field='tags', size=500) agg.metric('size', 'sum', field='size') s.aggs.bucket('tagkeys', 'terms', field='tags.key', size=100) res = client.search(index='s3bucketfile', body=s.to_dict(), size=0) tags_agg = res['aggregations']['tags']['buckets'] tags = dict((b['key'], b['size']['value']) for b in tags_agg) tagkeys = [b['key'] for b in res['aggregations']['tagkeys']['buckets']] return tags, tagkeys
from ``cls.get_id(obj)``. Uses ``seeker.mapping.serialize_object`` to build the field data dictionary. """ data = {'_id': cls.get_id(obj)} data.update(serialize_object(obj, cls._doc_type.mapping, prepare=cls)) return data @property def instance(self): """ Returns the Django model instance corresponding to this document, fetched using ``cls.queryset()``. """ return self.queryset().get(pk=self.meta.id) RawString = dsl.String(analyzer=DEFAULT_ANALYZER, fields={ 'raw': dsl.String(index='not_analyzed'), }) """ An ``elasticsearch_dsl.String`` instance (analyzed using ``SEEKER_DEFAULT_ANALYZER``) with a ``raw`` sub-field that is not analyzed, suitable for aggregations, sorting, etc. """ RawMultiString = dsl.String(analyzer=DEFAULT_ANALYZER, multi=True, fields={ 'raw': dsl.String(index='not_analyzed'), }) """ The same as ``RawString``, but with ``multi=True`` specified, so lists are returned. """
class FeatureSearchDocument(BaseDocument): """Index for feature search.""" # pylint: disable=no-member source = dsl.String(index='not_analyzed') feature_id = dsl.String( index='not_analyzed', # Additional subfield used for boosting during autocomplete. fields={'lower': { 'type': 'string', 'analyzer': identifier_analyzer }}, ) species = dsl.String() type = dsl.String() # pylint: disable=invalid-name sub_type = dsl.String() name = dsl.String( index='not_analyzed', # Additional subfield used for boosting during autocomplete. fields={'lower': { 'type': 'string', 'analyzer': identifier_analyzer }}, ) full_name = dsl.String() description = dsl.String() aliases = dsl.String( multi=True, index='not_analyzed', # Additional subfield used for boosting during autocomplete. fields={'lower': { 'type': 'string', 'analyzer': identifier_analyzer }}, ) # Autocomplete. autocomplete = dsl.String( multi=True, # During indexing, we lowercase terms and tokenize using edge_ngram. analyzer=dsl.analyzer( 'autocomplete_index', tokenizer='keyword', filter=[ 'lowercase', dsl.token_filter('autocomplete_filter', type='edgeNGram', min_gram=1, max_gram=15) ], ), # During search, we only lowercase terms. search_analyzer=dsl.analyzer( 'autocomplete_search', tokenizer='keyword', filter=['lowercase'], ), ) class Meta: """Meta class for feature search document.""" index = 'feature_search'
class Job(es.DocType): class Meta: index = 'jobs' doc_type = 'job-offer' french_elision = es.token_filter('french_elision', type='elision', articles_case=True, articles=[ 'l', 'm', 't', 'qu', 'n', 's', 'j', 'd', 'c', 'jusqu', 'quoiqu', 'lorsqu', 'puisqu' ]) french_stopwords = es.token_filter('french_stopwords', type='stop', stopwords='_french_') # Do not include this filter if keywords is empty french_keywords = es.token_filter('french_keywords', type='keyword_marker', keywords=[]) french_stemmer = es.token_filter('french_stemmer', type='stemmer', language='light_french') french_analyzer = es.analyzer( 'french_analyzer', tokenizer='standard', filter=[ 'lowercase', 'asciifolding', french_elision, french_stopwords, # french_keywords, french_stemmer ], char_filter=['html_strip']) technologies_tokenizer = es.tokenizer('comma_tokenizer', type='pattern', pattern=' |,|, ') technologies_synonyms_filter = es.token_filter( 'technologies_synonyms', type='synonym', synonyms=[ 'c => c_language', 'c++, cpp => cpp_language', 'c/c++, c/cpp => c_language', 'c/c++, c/cpp => cpp_language', 'c#, c♯, csharp => csharp_language', 'f#, f♯, fsharp => fsharp_language', 'c#, c♯, csharp => dotnet', 'f#, f♯, fsharp => dotnet', '.net => dotnet' ]) technologies_analyzer = es.analyzer( 'technologies_analyzer', tokenizer=technologies_tokenizer, filter=['lowercase', 'asciifolding', technologies_synonyms_filter]) company_name_analyzer = es.analyzer('company_name_analyzer', tokenizer='standard', filter=['lowercase', 'asciifolding']) id = es.Integer() url = es.String(index='no') source = es.String(index='not_analyzed') title = es.String( analyzer=french_analyzer, fields={'technologies': es.String(analyzer=technologies_analyzer)}) description = es.String( analyzer=french_analyzer, fields={'technologies': es.String(analyzer=technologies_analyzer)}) company = es.String(analyzer=company_name_analyzer) company_url = es.String(index='no') address = es.String(analyzer=french_analyzer) address_is_valid = es.Boolean() tags = es.Nested(doc_class=Tag, properties=dict(tag=es.String(index='not_analyzed'), weight=es.Integer())) publication_datetime = es.Date() publication_datetime_is_fake = es.Boolean() crawl_datetime = es.Date() geolocation = es.GeoPoint() geolocation_is_valid = es.Boolean() def __init__(self, meta=None, **kwargs): super(Job, self).__init__(meta, **kwargs) self._doc_type.index = compute_index_name(self.index) @property def index(self): return self._doc_type.index @property def doc_type(self): return self._doc_type.name @property def published(self): return format_date(self.publication_datetime, locale='FR_fr') @property def published_in_days(self): delta = datetime.now() - self.publication_datetime # TODO: bugfix return format_timedelta(delta, granularity='day', locale='en_US') @property def alltags(self): tags = [] if self.tags: for tag in self.tags: if tag['tag'] not in condition_tags: tags.append(Tag2(tag['tag'], tag['weight'])) return tags @property def condition_tags(self): tags = [] if self.tags: for tag in self.tags: if tag['tag'] in condition_tags: tag = Tag2(tag['tag'], tag['weight'], Tag2.get_css(tag['tag'])) tags.append(tag) return tags
class AWSMetric(dsl.DocType): class Meta: index = 'awsmetric' key = dsl.String(index='not_analyzed') resource = dsl.String(index='not_analyzed') metric = dsl.String(index='not_analyzed') time = dsl.Date(format='date_optional_time||epoch_millis') period = dsl.Integer() value = dsl.Double() @classmethod def underutilized_resources(cls, keys, timespan=timedelta(days=30)): keys = any_key_to_string_array(keys) s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum') s = s.filter('terms', key=keys) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('percentiles', 'percentile_ranks', field='value', values=[20, 50]) res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) resources = [] for resource in res['aggregations']['resources']['buckets']: if resource['percentiles']['values']['20.0'] == 100: res_region, res_id = resource['key'].split('/') resources.append( dict(type='EC2 Instance', id=res_id, region=res_region, underutilized=['CPU usage under 20%'])) return dict(resources=resources) @classmethod def hourly_cpu_usage(cls, keys, resources=None): s = cls.search() if isinstance(keys, basestring): keys = [keys] elif not isinstance(keys, list): keys = list(keys) assert all(isinstance(key, basestring) for key in keys) s = s.filter('terms', key=keys) if resources: s = s.filter('terms', resource=resources) s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum') agg = s.aggs.bucket('intervals', 'date_histogram', field='time', interval='hour', min_doc_count=1) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) tmp_hours = defaultdict(list) for interval in res['aggregations']['intervals']['buckets']: interval_hour = interval['key_as_string'].split('T')[1].split( ':')[0] tmp_hours[interval_hour].append(interval['utilization']['value']) hours = OrderedDict( zip(["{:02d}".format(x) for x in range(0, 24)], itertools.repeat(0))) for hour, values in tmp_hours.iteritems(): hours[hour] = sum(values) / len(values) if not tmp_hours: return None return [ dict(hour=hour, cpu=float(cpu)) for hour, cpu in hours.iteritems() ] @classmethod def days_of_the_week_cpu_usage(cls, keys, resources=None): s = cls.search() if isinstance(keys, basestring): keys = [keys] elif not isinstance(keys, list): keys = list(keys) assert all(isinstance(key, basestring) for key in keys) s = s.filter('terms', key=keys) if resources: s = s.filter('terms', resource=resources) s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum') agg = s.aggs.bucket('intervals', 'date_histogram', field='time', interval='day', min_doc_count=1) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) tmp_days_of_the_week = defaultdict(list) for interval in res['aggregations']['intervals']['buckets']: weekday = datetime.strptime( interval['key_as_string'].split('T')[0], '%Y-%m-%d').date().weekday() tmp_days_of_the_week[weekday].append( interval['utilization']['value']) days = OrderedDict(zip(range(0, 7), itertools.repeat(0))) for weekday, values in tmp_days_of_the_week.iteritems(): days[weekday] = sum(values) / len(values) if not tmp_days_of_the_week: return None return [ dict(day=calendar.day_name[weekday], cpu=float(cpu)) for weekday, cpu in days.iteritems() ] @classmethod def daily_cpu_utilization(cls, key): s = cls.search() s = s.filter('term', key=key) s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum') agg = s.aggs.bucket('intervals', 'date_histogram', field='time', interval='day', min_doc_count=1) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) for interval in res['aggregations']['intervals']['buckets']: yield interval['key_as_string'].split( 'T')[0], interval['utilization']['value'] @classmethod def get_cpu_usage(cls, key, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum') s = s.filter('term', key=key) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_instance_read_iops_usage(cls, key, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='AWS/EC2:DiskReadOps:Average') s = s.filter('term', key=key) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_instance_write_iops_usage(cls, key, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='AWS/EC2:DiskWriteOps:Average') s = s.filter('term', key=key) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_instance_read_bytes_usage(cls, key, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='AWS/EBS:DiskReadBytes:Average') s = s.filter('term', key=key) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_instance_write_bytes_usage(cls, key, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='AWS/EBS:DiskWriteBytes:Average') s = s.filter('term', key=key) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_volume_read_iops_usage(cls, key, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='AWS/EBS:VolumeReadOps:Average') s = s.filter('term', key=key) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_volume_write_iops_usage(cls, key, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='AWS/EBS:VolumeWriteOps:Average') s = s.filter('term', key=key) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_volume_read_bytes_usage(cls, key, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='AWS/EBS:VolumeReadBytes:Average') s = s.filter('term', key=key) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_volume_write_bytes_usage(cls, key, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='AWS/EBS:VolumeWriteBytes:Average') s = s.filter('term', key=key) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_network_in_usage(cls, key, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='AWS/EC2:NetworkIn:Average') s = s.filter('term', key=key) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_network_out_usage(cls, key, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='AWS/EC2:NetworkOut:Average') s = s.filter('term', key=key) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_s3_space_usage(cls, key, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='AWS/S3:BucketSizeBytes:Average') s = s.filter('term', key=key) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='awsmetric', body=s.to_dict(), size=0, request_timeout=60) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value']
class GoogleMetric(dsl.DocType): class Meta: index = 'googlemetric' identity = dsl.String(index='not_analyzed') resource = dsl.String(index='not_analyzed') metric = dsl.String(index='not_analyzed') time = dsl.Date(format='date_optional_time||epoch_millis') value = dsl.Double() @classmethod def daily_cpu_utilization(cls, identity_email): s = cls.search() s = s.filter('term', identity=identity_email) s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/cpu/utilization') agg = s.aggs.bucket('intervals', 'date_histogram', field='time', interval='day', min_doc_count=1) agg.metric('utilization', 'avg', field='value') res = client.search(index='googlemetric', body=s.to_dict(), size=0) for interval in res['aggregations']['intervals']['buckets']: yield interval['key_as_string'].split('T')[0], interval['utilization']['value'] @classmethod def get_cpu_usage(cls, identity_email, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/cpu/utilization') s = s.filter('term', identity=identity_email) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='googlemetric', body=s.to_dict(), size=0) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_disk_read_iops_usage(cls, identity_email, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/disk/read_ops_count') s = s.filter('term', identity=identity_email) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='googlemetric', body=s.to_dict(), size=0) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_disk_write_iops_usage(cls, identity_email, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/disk/write_ops_count') s = s.filter('term', identity=identity_email) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='googlemetric', body=s.to_dict(), size=0) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_disk_read_bytes_usage(cls, identity_email, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/disk/read_bytes_count') s = s.filter('term', identity=identity_email) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='googlemetric', body=s.to_dict(), size=0) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_disk_write_bytes_usage(cls, identity_email, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/disk/write_bytes_count') s = s.filter('term', identity=identity_email) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='googlemetric', body=s.to_dict(), size=0) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_network_in_usage(cls, identity_email, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/network/received_bytes_count') s = s.filter('term', identity=identity_email) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='googlemetric', body=s.to_dict(), size=0) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value'] @classmethod def get_network_out_usage(cls, identity_email, timespan=timedelta(days=30)): s = cls.search() s = s.filter('range', time={'gt': (datetime.utcnow() - timespan).isoformat()}) s = s.filter('term', metric='GCLOUD/COMPUTE:compute.googleapis.com/instance/network/sent_bytes_count') s = s.filter('term', identity=identity_email) agg = s.aggs.bucket('resources', 'terms', field='resource', size=300) agg.metric('utilization', 'avg', field='value') res = client.search(index='googlemetric', body=s.to_dict(), size=0) for resource in res['aggregations']['resources']['buckets']: yield resource['key'], resource['utilization']['value']
class BaseDocument(seeker.Indexable): base_field = dsl.String()
class TestModelWithSelfDependencyDocument(BaseDocument): # pylint: disable=no-member name = dsl.String() class Meta: index = 'test_model_with_self_dependency_search'
class AWSStat(dsl.DocType): class Meta: index = 'awsstat' key = dsl.String(index='not_analyzed') time = dsl.Date(format='date_optional_time||epoch_millis') stat = dsl.String(index='not_analyzed') data = dsl.Object(enabled=False) @classmethod def latest_instance_stats(cls, key): s = cls.search() s = s.filter('term', key=key) s = s.filter('term', stat='instances').sort('-time') res = client.search(index='awsstat', body=s.to_dict(), size=10, request_timeout=60) stats = [] for r in res['hits']['hits']: stat = r['_source']['data'] stat.update(time=r['_source']['time']) stats.append(stat) stats.sort(key=lambda s: s['time'], reverse=True) return dict(stats=stats) @classmethod def get_latest_instance_states(cls, key, instance_id, days=5): s = cls.search() s = s.filter('term', key=key) s = s.filter('term', stat='instancestate/' + instance_id).sort('-time') res = client.search(index='awsstat', body=s.to_dict(), size=days, request_timeout=60) states = [] for r in res['hits']['hits']: states.append( dict(time=r['_source']['time'], state=r['_source']['data']['state'])) return states @classmethod def latest_on_demand_to_reserved_suggestion(cls, keys): keys = any_key_to_string_array(keys) s = cls.search() s = s.filter('terms', key=keys) s = s.filter('term', stat='ondemandtoreserved').sort('-time') res = client.search(index='awsstat', body=s.to_dict(), size=1, request_timeout=60) if res['hits']['total'] > 0: return res['hits']['hits'][0]['_source']['data'] return dict(total=0) @classmethod def latest_s3_space_usage(cls, keys): keys = any_key_to_string_array(keys) s = cls.search() s = s.filter('terms', key=keys) s = s.filter('term', stat='s3spaceusage').sort('-time') res = client.search(index='awsstat', body=s.to_dict(), size=1, request_timeout=60) if res['hits']['total'] > 0: return res['hits']['hits'][0]['_source']['data'] return None @classmethod def latest_available_volumes(cls, keys): keys = any_key_to_string_array(keys) s = cls.search() s = s.filter('terms', key=keys) s = s.filter('term', stat='detachedvolumes').sort('-time') res = client.search(index='awsstat', body=s.to_dict(), size=1, request_timeout=60) if res['hits']['total'] > 0: return res['hits']['hits'][0]['_source']['data'] return dict(total=0) @classmethod def latest_hourly_cpu_usage_by_tag(cls, key): s = cls.search() s = s.filter('term', key=key) s = s.filter('term', stat='hourlycpubytag').sort('-time') res = client.search(index='awsstat', body=s.to_dict(), size=1, request_timeout=60) if res['hits']['total'] > 0 and 'data' in res['hits']['hits'][0][ '_source']: return res['hits']['hits'][0]['_source']['data'] return dict(tags=[]) @classmethod def latest_daily_cpu_usage_by_tag(cls, key): s = cls.search() s = s.filter('term', key=key) s = s.filter('term', stat='dailycpubytag').sort('-time') res = client.search(index='awsstat', body=s.to_dict(), size=1, request_timeout=60) if res['hits']['total'] > 0 and 'data' in res['hits']['hits'][0][ '_source']: return res['hits']['hits'][0]['_source']['data'] return dict(tags=[]) @classmethod def latest_stopped_instances_report(cls, keys): keys = any_key_to_string_array(keys) s = cls.search() s = s.filter('terms', key=keys) s = s.filter('term', stat='stoppedinstancesreport').sort('-time') res = client.search(index='awsstat', body=s.to_dict(), size=1, request_timeout=60) if res['hits']['total'] > 0: return res['hits']['hits'][0]['_source']['data'] return dict(total=0)
class GoogleDailyResource(dsl.DocType): class Meta: index = 'googledailyresource' identity = dsl.String(index='not_analyzed') rid = dsl.String(index='not_analyzed') product = dsl.String(index='not_analyzed') project_name = dsl.String(index='not_analyzed') date = dsl.Date(format='date_optional_time||epoch_millis') cost = dsl.Double() @classmethod def daily_compute_cost(cls, identity_email): s = cls.search() s = s.filter('term', identity=identity_email) s = s.filter('term', product='com.google.cloud/services/compute-engine') agg = s.aggs.bucket('intervals', 'date_histogram', field='date', interval='day', min_doc_count=1) agg.metric('cost', 'sum', field='cost') res = client.search(index='googledailyresource', body=s.to_dict(), size=0) for interval in res['aggregations']['intervals']['buckets']: yield interval['key_as_string'].split( 'T')[0], interval['cost']['value'] @classmethod def daily_cost_by_product(cls, identity_email, timespan=timedelta(days=7), top=4): now = datetime.utcnow() rollup = cls.rollup_by_product(identity_email, now - timespan, now, 'day', top) days = defaultdict(list) for interval, product, cost in rollup: days[interval.split('T')[0]].append( dict(cost=cost, product=get_google_uri_name(product))) res = dict(days=[dict(day=d, products=ps) for d, ps in days.items()]) res['days'] = sorted(res['days'], key=lambda x: x['day']) return res @classmethod def month_cost_by_product(cls, identity_email, top=4): now = datetime.utcnow() rollup = cls.rollup_by_product(identity_email, datetime(now.year, now.month, 1), datetime.utcnow(), 'month', top) month = {'products': []} for interval, product, cost in rollup: month['month'] = '-'.join(interval.split('-')[:2]) month['products'].append( dict(cost=cost, product=get_google_uri_name(product))) return month @classmethod def range_query(cls, identity_email, start, stop): s = cls.search() s = s.filter('term', identity=identity_email) s = s.filter('range', date={ 'gt': start.isoformat(), 'lte': stop.isoformat() }) return s @classmethod def rollup_by_product(cls, identity_email, start, stop, interval, top): s = cls.range_query(identity_email, start, stop) agg = s.aggs.bucket('intervals', 'date_histogram', field='date', interval=interval, min_doc_count=1) agg.bucket('product', 'terms', field='product').metric('cost', 'sum', field='cost') res = client.search(index='googledailyresource', body=s.to_dict(), size=0) product_costs = defaultdict(float) for interval in res['aggregations']['intervals']['buckets']: for product in interval['product']['buckets']: product_costs[product['key']] += product['cost']['value'] top_prods = set( sorted(product_costs, key=lambda p: product_costs[p], reverse=True)[:top]) interval_prods = defaultdict(set) for interval in res['aggregations']['intervals']['buckets']: for product in interval['product']['buckets']: if product['key'] in top_prods: yield interval['key_as_string'], product['key'], product[ 'cost']['value'] interval_prods[interval['key_as_string']].add( product['key']) for interval, prods in interval_prods.items(): missing = top_prods - prods for prod in missing: yield interval, prod, 0.0 @classmethod def monthly_aggregates_resource(cls, identity_email): s = cls.search() s = s.filter('term', identity=identity_email) agg = s.aggs.bucket('months', 'date_histogram', field='date', interval='month', min_doc_count=1) agg.bucket('rid', 'terms', field='rid', size=0x7FFFFFFF).metric('cost', 'sum', field='cost') res = client.search(index='googledailyresource', body=s.to_dict(), size=0) months = [] for month in res['aggregations']['months']['buckets']: resources = [] for resource in month['rid']['buckets']: resources.append( dict(cost=resource['cost']['value'], resource=resource['key'])) if resources == []: continue months.append( dict(month=month['key_as_string'].split('T')[0], resources=resources)) return dict(months=months) @classmethod def monthly_aggregates_project(cls, identity_email): s = cls.search() s = s.filter('term', identity=identity_email) agg = s.aggs.bucket('months', 'date_histogram', field='date', interval='month', min_doc_count=1) agg.bucket('project_name', 'terms', field='project_name', size=0x7FFFFFFF).metric('cost', 'sum', field='cost') res = client.search(index='googledailyresource', body=s.to_dict(), size=0) months = [] for month in res['aggregations']['months']['buckets']: projects = [] for project in month['project_name']['buckets']: projects.append( dict(cost=project['cost']['value'], project=project['key'])) if projects == []: continue months.append( dict(month=month['key_as_string'].split('T')[0], projects=projects)) return dict(months=months)
class Company(es.DocType): class Meta: index = 'companies' doc_type = 'company' french_elision = es.token_filter( 'french_elision', type='elision', articles_case=True, articles=[ 'l', 'm', 't', 'qu', 'n', 's', 'j', 'd', 'c', 'jusqu', 'quoiqu', 'lorsqu', 'puisqu' ] ) french_stopwords = es.token_filter( 'french_stopwords', type='stop', stopwords='_french_' ) # Do not include this filter if keywords is empty french_keywords = es.token_filter( 'french_keywords', type='keyword_marker', keywords=[] ) french_stemmer = es.token_filter( 'french_stemmer', type='stemmer', language='light_french' ) french_analyzer = es.analyzer( 'french_analyzer', tokenizer='standard', filter=[ 'lowercase', 'asciifolding', french_elision, french_stopwords, # french_keywords, french_stemmer ], char_filter=['html_strip'] ) technologies_tokenizer = es.tokenizer( 'comma_tokenizer', type='pattern', pattern=' |,|, ' ) technologies_synonyms_filter = es.token_filter( 'technologies_synonyms', type='synonym', synonyms=[ 'c => c_language', 'c++, cpp => cpp_language', 'c/c++, c/cpp => c_language', 'c/c++, c/cpp => cpp_language', 'c#, c♯, csharp => csharp_language', 'f#, f♯, fsharp => fsharp_language', 'c#, c♯, csharp => dotnet', 'f#, f♯, fsharp => dotnet', '.net => dotnet' ] ) technologies_analyzer = es.analyzer( 'technologies_analyzer', tokenizer=technologies_tokenizer, filter=[ 'lowercase', 'asciifolding', technologies_synonyms_filter ] ) company_name_analyzer = es.analyzer( 'company_name_analyzer', tokenizer='standard', filter=[ 'lowercase', 'asciifolding' ] ) id = es.String(index='no') name = es.String(analyzer=french_analyzer) description = es.String( analyzer=french_analyzer, fields={ 'technologies': es.String(analyzer=technologies_analyzer) } ) technologies = es.String(analyzer=technologies_analyzer) url = es.String(index='no') logo_url = es.String(index='no') address = es.String(analyzer=french_analyzer) address_is_valid = es.Boolean() email = es.String(index='no') phone = es.String(index='no') geolocation = es.GeoPoint() geolocation_is_valid = es.Boolean() def __init__(self, meta=None, **kwargs): super(Company, self).__init__(meta, **kwargs) self._doc_type.index = compute_index_name(self.index) @property def index(self): return self._doc_type.index @property def doc_type(self): return self._doc_type.name
class AWSDetailedLineitem(dsl.DocType): class Meta: index = 'awsdetailedlineitem' availability_zone = dsl.String(index='not_analyzed') cost = dsl.Double() un_blended_cost = dsl.Double() item_description = dsl.String(index='not_analyzed') linked_account_id = dsl.String(index='not_analyzed') operation = dsl.String() payer_account_id = dsl.String(index='not_analyzed') pricing_plan_id = dsl.Long() product_name = dsl.String(index='not_analyzed') rate = dsl.Double() un_blended_rate = dsl.Double() rate_id = dsl.Long() record_id = dsl.String(index='not_analyzed') reserved_instance = dsl.Boolean() resource_id = dsl.String(index='not_analyzed') subscription_id = dsl.Long() tag = dsl.Object( properties={ 'key': dsl.String(index='not_analyzed'), 'value': dsl.String(index='not_analyzed') }) usage_end_date = dsl.Date(format='strict_date_optional_time||epoch_millis') usage_quantity = dsl.Double() usage_start_date = dsl.Date( format='strict_date_optional_time||epoch_millis') usage_type = dsl.String(index='not_analyzed') @classmethod @with_cache(ttl=3600 * 3, worker_refresh=True) def keys_has_data(cls, keys, date_from=None, date_to=None): date_to = date_to or datetime.utcnow() s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) if date_from: s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) return res['hits']['total'] > 0 @classmethod @with_cache(is_json=False, ret=lambda x: datetime.strptime(x, "%Y-%m-%d")) def get_first_date(cls, keys): s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.sort('usage_start_date') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=1, request_timeout=60) if res['hits']['total'] == 0: return return res['hits']['hits'][0]['_source']['usage_start_date'].split( 'T')[0] @classmethod @with_cache(is_json=False, ret=lambda x: datetime.strptime(x, "%Y-%m-%d")) def get_last_date(cls, keys, limit=None): s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) if limit: s = s.filter('range', usage_start_date={'to': limit.isoformat()}) s = s.sort('-usage_start_date') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=1, request_timeout=60) if res['hits']['total'] == 0: return return res['hits']['hits'][0]['_source']['usage_start_date'].split( 'T')[0] @classmethod def get_first_to_now_date(cls, keys): def from_date_to_today(d): now = datetime.utcnow() while d < now: yield d d += relativedelta(months=1) return list(from_date_to_today(cls.get_first_date(keys))) @classmethod def get_first_to_last_date(cls, keys): def from_date_to_last(d): last = cls.get_last_date(keys) while d < last: yield d d += relativedelta(months=1) return list(from_date_to_last(cls.get_first_date(keys))) @classmethod @with_cache(6 * 3600) def get_available_tags(cls, keys, only_with_data=None, product_name=None): s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) if product_name: s = s.filter('term', product_name=product_name) s.aggs.bucket('tag_key', 'terms', field='tag.key') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) tags = [] for tag in res['aggregations']['tag_key']['buckets']: if tag['key'].startswith('user:'******'key'].split(':')[1] if not only_with_data or name in AWSStat.latest_hourly_cpu_usage_by_tag( only_with_data )['tags'] or name in AWSStat.latest_daily_cpu_usage_by_tag( only_with_data)['tags']: tags.append(name) tags.sort() return dict(tags=tags) @classmethod @with_cache(ttl=6 * 3600) def get_cost_by_tag(cls, keys, tag, date_from=None, date_to=None): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('term', **{'tag.key': 'user:{}'.format(tag)}) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) s.aggs.bucket('total_cost', 'sum', field='cost') agg = s.aggs.bucket('tag_value', 'terms', field='tag.value', size=0x7FFFFFFF) agg.bucket('cost', 'sum', field='cost') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) tags = [{ 'tag_value': tag['key'], 'cost': tag['cost']['value'], } for tag in res['aggregations']['tag_value']['buckets']] return dict(tags=tags, total_cost=res['aggregations']['total_cost']['value']) @classmethod @with_cache(ttl=6 * 3600) def get_cost(cls, keys, date_from, date_to=None): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace( hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) s.aggs.bucket('total_cost', 'sum', field='cost') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) return dict(total_cost=res['aggregations']['total_cost']['value']) @classmethod @with_cache() def get_monthly_cost_by_tag(cls, keys, tag, date_from=None, date_to=None): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('term', **{'tag.key': 'user:{}'.format(tag)}) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) agg = s.aggs.bucket('intervals', 'date_histogram', field='usage_start_date', interval='month', min_doc_count=1) agg.bucket('total_cost', 'sum', field='cost') agg = agg.bucket('tag_value', 'terms', field='tag.value', size=0x7FFFFFFF) agg.bucket('cost', 'sum', field='cost') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) months = [{ 'month': interval['key_as_string'].split('T')[0][:-3], 'tags': [{ 'tag_value': tag['key'], 'cost': tag['cost']['value'], } for tag in interval['tag_value']['buckets']], 'total_cost': interval['total_cost']['value'], } for interval in res['aggregations']['intervals']['buckets']] return dict(months=months) @classmethod @with_cache() def get_cost_by_product(cls, key, date_from=None, date_to=None, without_discount=False, only_discount=False, size=0x7FFFFFFF): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter('term', linked_account_id=key) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) if without_discount: s = s.query( 'bool', filter=[ ~dsl.Q('term', item_description='PAR_APN_ProgramFee_2500') ]) if only_discount: s = s.filter('term', item_description='PAR_APN_ProgramFee_2500') agg = s.aggs.bucket('products', 'terms', field='product_name', order={'cost': 'desc'}, size=size) agg.bucket('cost', 'sum', field='cost') s = s.query('bool', filter=[~dsl.Q('term', cost=0)]) res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) products = [{ 'product': SHORT_NAMES.get(product['key'], product['key']), 'cost': product['cost']['value'], } for product in res['aggregations']['products']['buckets']] return dict(products=products) @classmethod @with_cache() def get_cost_by_region(cls, keys, tagged=False, byaccount=False, date_from=None, date_to=None, size=0x7FFFFFFF): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) agg = s.aggs if byaccount: agg = agg.bucket('accounts', 'terms', field='linked_account_id') agg = agg.bucket('intervals', 'date_histogram', field='usage_start_date', interval='month', min_doc_count=1) agg = agg.bucket('regions', 'terms', field='availability_zone', size=size) agg.bucket('cost', 'sum', field='cost') if tagged: agg = agg.bucket('tags', 'terms', field='tag.value') agg.bucket('cost', 'sum', field='cost') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0) return res['aggregations'] @classmethod @with_cache() def get_monthly_cost(cls, keys, date_from=None, date_to=None, size=0x7FFFFFFF): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) agg = s.aggs.bucket('intervals', 'date_histogram', field='usage_start_date', interval='month', min_doc_count=1) agg.bucket('cost', 'sum', field='cost') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) res = [{ 'month': interval['key_as_string'].split('T')[0], 'total_cost': interval['cost']['value'], } for interval in res['aggregations']['intervals']['buckets']] return dict(months=res) @classmethod @with_cache() def get_monthly_cost_by_product(cls, keys, tagged=False, date_from=None, date_to=None, size=0x7FFFFFFF): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) agg = s.aggs.bucket('intervals', 'date_histogram', field='usage_start_date', interval='month', min_doc_count=1) agg = agg.bucket('products', 'terms', field='product_name', size=size) agg.bucket('cost', 'sum', field='cost') if tagged: agg = agg.bucket('tags', 'terms', field='tag.value') agg.bucket('cost', 'sum', field='cost') s = s.query('bool', filter=[~dsl.Q('term', cost=0)]) res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) def tagged_cost(bucket, total): total_tag = 0.0 for tag in bucket: total_tag += tag['cost']['value'] yield (tag['key'], tag['cost']['value']) if total != total_tag: yield ('untagged', total - total_tag) res = [{ 'month': interval['key_as_string'].split('T')[0], 'products': [{ 'product': SHORT_NAMES.get(product['key'], product['key']), 'cost': product['cost']['value'], 'tags': [{ 'name': tag[0], 'cost': tag[1], } for tag in tagged_cost(product['tags']['buckets'], product['cost']['value'])], } for product in interval['products']['buckets']] if tagged else [{ 'product': SHORT_NAMES.get(product['key'], product['key']), 'cost': product['cost']['value'], } for product in interval['products']['buckets']] } for interval in res['aggregations']['intervals']['buckets']] return dict(months=res) @classmethod @with_cache(ttl=4 * 3600) def get_daily_cost_by_product(cls, keys, date_from=None, date_to=None, size=0x7FFFFFFF): date_from = date_from or datetime.utcnow().replace( hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace( hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) agg = s.aggs.bucket('intervals', 'date_histogram', field='usage_start_date', interval='day', min_doc_count=1) agg = agg.bucket('products', 'terms', field='product_name', size=size) agg.metric('cost', 'sum', field='cost') s = s.query('bool', filter=[~dsl.Q('term', cost=0)]) res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) res = [{ 'day': interval['key_as_string'].split('T')[0], 'products': [{ 'product': SHORT_NAMES.get(product['key'], product['key']), 'cost': product['cost']['value'], } for product in interval['products']['buckets']] } for interval in res['aggregations']['intervals']['buckets']] return dict(days=res) @classmethod @with_cache(ttl=24 * 3600) def get_yearly_cost_by_product(cls, keys, date_from=None, date_to=None, size=0x7FFFFFFF): date_from = date_from or datetime.utcnow().replace( month=1, day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(month=12, day=31, hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) agg = s.aggs.bucket('intervals', 'date_histogram', field='usage_start_date', interval='year', min_doc_count=1) agg = agg.bucket('products', 'terms', field='product_name', size=size) agg.metric('cost', 'sum', field='cost') s = s.query('bool', filter=[~dsl.Q('term', cost=0)]) res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) res = [{ 'year': interval['key_as_string'][:4], 'products': [{ 'product': SHORT_NAMES.get(product['key'], product['key']), 'cost': product['cost']['value'], } for product in interval['products']['buckets']] } for interval in res['aggregations']['intervals']['buckets']] return dict(years=res) @classmethod @with_cache() def get_cost_by_resource(cls, keys, date_from=None, date_to=None, search=None): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) if search: s = s.query('wildcard', resource_id='*{}*'.format(search)) agg = s.aggs.bucket('resources', 'terms', field='resource_id', order={'cost': 'desc'}, size=0x7FFFFFFF) agg.bucket('cost', 'sum', field='cost') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) resources = [{ 'resource': resource['key'], 'cost': resource['cost']['value'], } for resource in res['aggregations']['resources']['buckets']] return resources @classmethod def get_monthly_cost_by_resource(cls, resource_ids, date_from=None, date_to=None): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) if resource_ids: s = cls.search() s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) s = s.filter('terms', resource_id=list(resource_ids)) agg = s.aggs.bucket('months', 'date_histogram', field='usage_start_date', interval='month', min_doc_count=1) agg.metric('cost', 'sum', field='cost') r = client.search('awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) return { e['key_as_string']: e['cost']['value'] for e in r['aggregations']['months']['buckets'] } else: return {} @classmethod @with_cache() def get_lambda_usage(cls, keys, date_from=None, date_to=None): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('term', product_name='AWS Lambda') s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) agg = s.aggs.bucket('resources', 'terms', field='resource_id', size=0x7FFFFFFF) agg.metric('cost', 'avg', field='cost') agg = agg.bucket('types', 'terms', field='usage_type', size=0x7FFFFFFF) agg.metric('quantity', 'sum', field='usage_quantity') agg = agg.bucket('descriptions', 'terms', field='item_description', size=0x7FFFFFFF) agg.metric('quantity', 'sum', field='usage_quantity') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) #return res def _lambda_usage_regb(buckets, endswith): for b in buckets: if b['key'].endswith(endswith): return b['quantity']['value'] usages = [{ 'rid': usage['key'], 'name': usage['key'].split(':')[-1], 'requests': _lambda_usage_regb(usage['types']['buckets'], '-Request'), 'gb_seconds': _lambda_usage_regb(usage['types']['buckets'], '-Lambda-GB-Second'), 'cost': usage['cost']['value'], 'raw_cost': lambdapricing.get_raw_cost([ x['descriptions']['buckets'] for x in usage['types']['buckets'] ]), } for usage in res['aggregations']['resources']['buckets']] return usages @classmethod @with_cache() def get_s3_bandwidth_costs(cls, key, date_from=None, date_to=None): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter('term', linked_account_id=key) s = s.filter('term', product_name='Amazon Simple Storage Service') s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) agg = s.aggs.bucket('types', 'terms', field='usage_type', size=0x7FFFFFFF) agg.metric('cost', 'sum', field='cost') agg.metric('gb', 'sum', field='usage_quantity') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) transfers = [{ 'type': transfer['key'], 'quantity': transfer['gb']['value'], 'cost': transfer['cost']['value'], } for transfer in res['aggregations']['types']['buckets']] return transfers @classmethod @with_cache() def get_ec2_bandwidth_costs(cls, key, date_from=None, date_to=None): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter('term', linked_account_id=key) s = s.filter('term', product_name='Amazon Elastic Compute Cloud') s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) agg = s.aggs.bucket('types', 'terms', field='usage_type', size=0x7FFFFFFF) agg.metric('cost', 'sum', field='cost') agg.metric('gb', 'sum', field='usage_quantity') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) transfers = [{ 'type': transfer['key'], 'quantity': transfer['gb']['value'], 'cost': transfer['cost']['value'], } for transfer in res['aggregations']['types']['buckets']] return transfers @classmethod def get_ec2_daily_cost(cls, key): s = cls.search() s = s.filter('term', linked_account_id=key) s = s.filter('term', product_name='Amazon Elastic Compute Cloud') agg = s.aggs.bucket('intervals', 'date_histogram', field='usage_start_date', interval='day', min_doc_count=1) agg.metric('cost', 'sum', field='cost') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) for interval in res['aggregations']['intervals']['buckets']: yield interval['key_as_string'].split( 'T')[0], interval['cost']['value'] @classmethod @with_cache() def get_elb_usage_a_day(cls, keys, date_from=None, date_to=None): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) gib = Fraction(2**30) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) s = s.filter("prefix", resource_id="arn:aws:elasticloadbalancing") s = s.sort({"usage_start_date": {"order": "desc"}}) agg = s.aggs.bucket('rid', 'terms', field='resource_id', size=0x7FFFFFFF) agg.metric('cost', 'sum', field='cost') agg = agg.bucket('types', 'terms', field='usage_type', size=0x7FFFFFFF) agg.metric('quantity', 'sum', field='usage_quantity') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) elbs = [{ 'rid': elb['key'], 'cost': elb['cost']['value'] / (date_to - date_from).days, 'hours': float( sum([ x['quantity']['value'] for x in elb['types']['buckets'] if x['key'].endswith('LoadBalancerUsage') ]) / (date_to - date_from).days), 'bytes': float((sum([ x['quantity']['value'] for x in elb['types']['buckets'] if x['key'].endswith('Bytes') ]) * gib) / (date_to - date_from).days), } for elb in res['aggregations']['rid']['buckets']] return elbs @classmethod @with_cache() def get_instance_type(cls, keys, date_from=None, date_to=None): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.extra(_source=[ 'usage_start_date', 'usage_type', 'availability_zone', 'resource_id' ]) s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) s = s.filter("term", product_name='Amazon Elastic Compute Cloud') s = s.query('wildcard', usage_type='*BoxUsage:*') s = s.filter('exists', field='resource_id') s = s.sort({"usage_start_date": {"order": "desc"}}) res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=10000, request_timeout=60) def cut_region_name(s): return s[:-1] if s[-1].isalpha() else s types = [] refs = {} def add_in_types(type, rid): ref_tuple = (type['hour'], type['instance'], type['region']) if ref_tuple in refs: refs[ref_tuple]['rids'].append(rid) refs[ref_tuple]['ridCount'] += 1 return type['rids'] = [rid] types.append(type) refs[ref_tuple] = types[-1] for r in res['hits']['hits']: elem = { 'hour': r['_source']['usage_start_date'], 'instance': r['_source']['usage_type'].split(':')[1], 'region': cut_region_name(r['_source']['availability_zone']) if 'availability_zone' in r['_source'] else 'unknown', 'ridCount': 1, } add_in_types(elem, r['_source']['resource_id']) return types @classmethod @with_cache() def get_instance_hour(cls, keys, date_from=None, date_to=None, min_hour=None): date_from = date_from or datetime.utcnow().replace( day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) s = s.filter("term", product_name='Amazon Elastic Compute Cloud') s = s.filter('prefix', resource_id='i-') s = s.query('wildcard', usage_type='*BoxUsage*') agg = s.aggs.bucket('resource_id', 'terms', field='resource_id', size=0x7FFFFFFF) agg.bucket('days', 'date_histogram', field='usage_start_date', interval='day', min_doc_count=1) res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) instance_list = [] for instance in res['aggregations']['resource_id']['buckets']: tmp_hours = [] for day in instance['days']['buckets']: tmp_hours.append(day['doc_count']) avg_hours = sum(tmp_hours) / float(len(tmp_hours)) if not min_hour or avg_hours >= min_hour: instance_list.append(dict(id=instance['key'], hours=avg_hours)) return sorted(instance_list, key=lambda x: x['hours'], reverse=True) @classmethod @with_cache() def get_s3_buckets_per_tag(cls, keys): def _check_if_in_list(dict_list, value, key): return next((item for item in dict_list if item[key] == value), None) def _parse_tag_keys_results(res): bucket_tagged = [] for bucket_tag_key in res['aggregations']['tag_key']['buckets']: buff_tag_key = _check_if_in_list(bucket_tagged, bucket_tag_key['key'], 'tag_key') if buff_tag_key is None: buff_tag_key = { "tag_key": bucket_tag_key['key'], "tag_value": [] } buff_tag_key = _parse_tag_values_results( bucket_tag_key, buff_tag_key) bucket_tagged.append(buff_tag_key) return bucket_tagged def _parse_tag_values_results(bucket_tag_key, buff_tag_key): for bucket_tag_value in bucket_tag_key['tag_value']['buckets']: buff_tag_value = _check_if_in_list(buff_tag_key['tag_value'], bucket_tag_value['key'], 'tag_value') if buff_tag_value is None: buff_tag_value = { "tag_value": bucket_tag_value['key'], "s3_buckets": [] } buff_tag_value = _parse_buckets_results( buff_tag_value, bucket_tag_value) buff_tag_key['tag_value'].append(buff_tag_value) return buff_tag_key def _parse_buckets_results(buff_tag_value, bucket_tag_value): for bucket_resource_id in bucket_tag_value['ressource_id'][ 'buckets']: buff_bucket_resource_id = _check_if_in_list( buff_tag_value['s3_buckets'], bucket_resource_id['key'], 'bucket_name') if buff_bucket_resource_id is None: buff_bucket_resource_id = { "bucket_name": bucket_resource_id['key'], "account_id": bucket_resource_id['account_id']['buckets'][0]['key'] } buff_tag_value['s3_buckets'].append(buff_bucket_resource_id) return buff_tag_value s = cls.search() s = s.filter( 'terms', linked_account_id=keys if isinstance(keys, list) else [keys]) s = s.filter('term', product_name='Amazon Simple Storage Service') s = s.query('exists', field="tag") s = s.query('wildcard', item_description="*storage*") agg = s.aggs.bucket('tag_key', 'terms', field="tag.key") agg = agg.bucket('tag_value', 'terms', field='tag.value') agg.bucket('ressource_id', 'terms', field='resource_id').bucket('account_id', 'terms', field='linked_account_id') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) ''' bucket_tagged structure [{ "tag_key" : "KEY", # Unique in list "tag_value": [{ "tag_value": "VALUE", # Unique in list "s3_buckets": [{ "bucket_name": "BUCKET_NAME", "account_id": "ACCOUND_ID" }, {...}] }, {...}] }, {...}] ''' bucket_tagged = _parse_tag_keys_results(res) return bucket_tagged @classmethod @with_cache() def get_s3_bandwidth_info_and_cost_per_name(cls, key, bucket_resource_ids, date_from=None, date_to=None): date_from = date_from or (datetime.utcnow() - relativedelta( month=1)).replace(day=1, hour=0, minute=0, second=0, microsecond=0) date_to = date_to or date_from.replace(day=calendar.monthrange( date_from.year, date_from.month)[1], hour=23, minute=59, second=59, microsecond=999999) s = cls.search() s = s.filter('term', linked_account_id=key) s = s.filter('term', product_name='Amazon Simple Storage Service') s = s.filter('terms', resource_id=bucket_resource_ids if isinstance( bucket_resource_ids, list) else [bucket_resource_ids]) s = s.filter('range', usage_start_date={ 'from': date_from.isoformat(), 'to': date_to.isoformat() }) s = s.filter('wildcard', usage_type="*Bytes") agg = s.aggs.bucket('bucket_name', 'terms', field='resource_id', size=0x7FFFFFFF) agg.metric('cost', 'sum', field='cost') agg = agg.bucket('transfer_type', 'terms', field='usage_type') agg.metric('data', 'sum', field='usage_quantity') res = client.search(index='awsdetailedlineitem', body=s.to_dict(), size=0, request_timeout=60) data = [{ "bucket_name": bucket['key'], "cost": bucket['cost']['value'], "transfer_stats": [{ "type": transfer_stat['key'], "data": transfer_stat['data']['value'] } for transfer_stat in bucket['transfer_type']['buckets']] } for bucket in res['aggregations']['bucket_name']['buckets']] return data
class Organisatie(es.DocType): ext_id = es.String(index='not_analyzed') naam = es.String(analyzer=dutch_analyzer) # ngram beschrijving = es.String(analyzer=dutch_analyzer) afdeling = es.String(index='not_analyzed')
class ResponseDocType(FjordDocType): id = es_dsl.Integer() happy = es_dsl.Boolean() api = es_dsl.Integer() url = es_dsl.String(index='not_analyzed') url_domain = es_dsl.String(index='not_analyzed') has_email = es_dsl.Boolean() description = es_dsl.String(analyzer='snowball') category = es_dsl.String(index='not_analyzed') description_bigrams = es_dsl.String(index='not_analyzed') description_terms = es_dsl.String(analyzer='standard') user_agent = es_dsl.String(index='not_analyzed') product = es_dsl.String(index='not_analyzed') channel = es_dsl.String(index='not_analyzed') version = es_dsl.String(index='not_analyzed') browser = es_dsl.String(index='not_analyzed') browser_version = es_dsl.String(index='not_analyzed') platform = es_dsl.String(index='not_analyzed') locale = es_dsl.String(index='not_analyzed') country = es_dsl.String(index='not_analyzed') device = es_dsl.String(index='not_analyzed') manufacturer = es_dsl.String(index='not_analyzed') source = es_dsl.String(index='not_analyzed') campaign = es_dsl.String(index='not_analyzed') souce_campaign = es_dsl.String(index='not_analyzed') organic = es_dsl.Boolean() created = es_dsl.Date() docs = ResponseDocTypeManager() class Meta: pass def mlt(self): """Returns a search with a morelikethis query for docs like this""" # Short responses tend to not repeat any words, so then MLT # returns nothing. This fixes that by setting min_term_freq to # 1. Longer responses tend to repeat important words, so we can # set min_term_freq to 2. num_words = len(self.description.split(' ')) if num_words > 40: min_term_freq = 2 else: min_term_freq = 1 s = self.search() if self.product: s = s.filter('term', product=self.product) if self.platform: s = s.filter('term', platform=self.platform) s = s.query('more_like_this', fields=['description'], docs=[{ '_index': get_index_name(), '_type': self._doc_type.name, '_id': self.id }], min_term_freq=min_term_freq, stop_words=list(ANALYSIS_STOPWORDS)) return s @classmethod def get_model(cls): return Response @classmethod def public_fields(cls): """Fields that can be publicly-visible .. Note:: Do NOT include fields that have PII in them. """ return ('id', 'happy', 'api', 'url_domain', 'has_email', 'description', 'category', 'description_bigrams', 'user_agent', 'product', 'version', 'platform', 'locale', 'source', 'campaign', 'organic', 'created') @property def truncated_description(self): """Shorten feedback for dashboard view.""" return smart_truncate(self.description, length=500) @classmethod def extract_doc(cls, resp, with_id=True): """Converts a Response to a dict of values This can be used with ``ResponseDocType.from_obj()`` to create a ``ResponseDocType`` object or it can be used for indexing. :arg resp: a Response object :arg with_id: whether or not to include the ``_id`` value--include it when you're bulk indexing :returns: a dict """ doc = { 'id': resp.id, 'happy': resp.happy, 'api': resp.api, 'url': resp.url, 'url_domain': resp.url_domain, 'has_email': bool(resp.user_email), 'description': resp.description, 'user_agent': resp.user_agent, 'product': resp.product, 'channel': resp.channel, 'version': resp.version, 'browser': resp.browser, 'browser_version': resp.browser_version, 'platform': resp.platform, 'locale': resp.locale, 'country': resp.country, 'device': resp.device, 'manufacturer': resp.manufacturer, 'source': resp.source, 'campaign': resp.campaign, 'source_campaign': '::'.join([(resp.source or '--'), (resp.campaign or '--')]), 'organic': (not resp.campaign), 'created': resp.created } # We only compute bigrams for english because the analysis # uses English stopwords, stemmers, ... if resp.locale.startswith(u'en') and resp.description: doc['description_bigrams'] = compute_grams(resp.description) else: doc['description_bigrams'] = [] if with_id: doc['_id'] = doc['id'] return doc
class Activiteit(es.DocType): ext_id = es.String(index='not_analyzed') naam = es.String(analyzer=dutch_analyzer) beschrijving = es.String(analyzer=dutch_analyzer) bron_link = es.String(index='not_analyzed') tijdstip = es.String(index='not_analyzed') tags = es.String(index='not_analyzed') centroid = es.GeoPoint() locatie = es.Object(doc_class=Locatie, properties={ 'ext_id': es.String(index='not_analyzed'), 'naam': es.String(analyzer=dutch_analyzer), 'centroid': es.GeoPoint(), 'openbare_ruimte_naam': es.String(index='not_analyzed'), 'huisnummer': es.String(index='not_analyzed'), 'huisnummer_toevoeging': es.String(index='not_analyzed'), 'postcode': es.String(index='not_analyzed') })
class AWSAccessLog(dsl.DocType): class Meta: index = 'awsaccesslog' key = dsl.String(index='not_analyzed') resource = dsl.String(index='not_analyzed') time = dsl.Date(format='date_optional_time||epoch_millis') period = dsl.Integer() bucket = dsl.String(index='not_analyzed') object = dsl.String(index='not_analyzed') @classmethod def most_accessed_s3_objects(cls, key): s = cls.search() s = s.filter('term', key=key) agg = s.aggs.bucket('objects', 'terms', field='object') agg.bucket('buckets', 'terms', field='bucket') res = client.search(index='awsaccesslog', body=s.to_dict(), request_timeout=60) objects = [] for object in res['aggregations']['objects']['buckets']: objects.append( dict(bucket=object['buckets']['buckets'][0]['key'], object=object['key'], access_count=object['doc_count'])) return dict(objects=objects) @classmethod def last_accessed_s3_objects(cls, key): s = cls.search() s = s.filter('term', key=key) agg = s.aggs.bucket('objects', 'terms', field='object') res = client.search(index='awsaccesslog', body=s.to_dict(), request_timeout=60) objects = [] for object in res['aggregations']['objects']['buckets']: s2 = cls.search() s2 = s2.query("match", object=object['key']) s2 = s2.sort('-time') res2 = client.search(index='awsaccesslog', body=s2.to_dict(), request_timeout=60) objects.append( dict(object=object['key'], bucket=res2['hits']['hits'][0]['_source']['bucket'], last_access=res2['hits']['hits'][0]['_source']['time'])) return dict(objects=objects) @classmethod def last_access_s3_bucket(cls, key, bucket): s = cls.search() s = s.filter('term', key=key).filter('term', bucket=bucket) s.sort('-time') res = client.search(index='awsaccesslog', body=s.to_dict(), request_timeout=60) if res['hits']['total'] > 0: return res['hits']['hits'][0]['_source']['time'] else: return 'never_accessed'