def aws_cpu_hourly_usage_by_tag(accounts, tag): res = [] tpm_cpu_usage_infos = defaultdict(lambda: dict( nb_instances=0, usage={x: [] for x in ["{:02d}".format(x) for x in range(0, 24)]})) for account in accounts: usage = AWSStat.latest_hourly_cpu_usage_by_tag(account.key) if tag in usage['tags']: for tag_infos in usage['tags'][tag]: tpm_cpu_usage_infos[tag_infos['tag_value']][ 'nb_instances'] += tag_infos['nb_instances'] for hour in tag_infos['usage']: tpm_cpu_usage_infos[tag_infos['tag_value']]['usage'][ hour['hour']].append(hour['cpu']) for tag_value, tag_infos in tpm_cpu_usage_infos.iteritems(): tag_res = dict(nb_instances=tag_infos['nb_instances'], tag_value=tag_value, usage=[]) for usage_hour, usage_details in tag_infos['usage'].iteritems(): cpu_usage = 0 if not len( usage_details) else sum(usage_details) / len(usage_details) tag_res['usage'].append(dict(hour=usage_hour, cpu=cpu_usage)) tag_res['usage'].sort(key=lambda x: x['hour']) res.append(tag_res) return jsonify(values=res)
def get_stopped_instances_report(session, key, days=5): stopped_instances = [] for region, instance in get_all_instances(session): latest_states = AWSStat.get_latest_instance_states(key, instance.id, days) if len(latest_states) != days: continue not_stopped_states = [state for state in latest_states if state['state'] != 'stopped'] if not len(not_stopped_states): stopped_instances.append(instance.id) return dict(stopped_instances=stopped_instances, total=len(stopped_instances))
def aws_instance_stats(accounts): """--- get: tags: - aws produces: - application/json description: &desc Get instance statistics summary: *desc responses: 200: description: List of stats schema: properties: stats: type: array items: properties: reserved: type: number unreserved: type: number unused: type: number stopped: type: number 403: description: Not logged in 404: description: AWS account not registered """ datasets = [] reserved_report = [] for account in accounts: instance_stats = AWSStat.latest_instance_stats(account.key) if 'stats' in instance_stats and len(instance_stats['stats']): datasets.append(instance_stats['stats'][0]) if 'reserved_instances_report' in instance_stats['stats'][0]: reserved_report += instance_stats['stats'][0][ 'reserved_instances_report'] if not len(datasets): return jsonify(message=get_next_update_estimation_message_aws( accounts, AWS_KEY_PROCESSING_INTERVAL_HOURS)) return jsonify({ 'stats': [{ 'reserved_report': reserved_report, 'reserved': sum(d['reserved'] for d in datasets), 'stopped': sum(d['stopped'] for d in datasets), 'unreserved': sum(d['unreserved'] for d in datasets), 'unused': sum(d['unused'] for d in datasets), 'time': datasets[0]['time'], }] })
def aws_cpu_days_of_the_week_usage_by_tag(accounts, tag): res = [] days_of_the_week = dict(Monday=0, Tuesday=1, Wednesday=2, Thursday=3, Friday=4, Saturday=5, Sunday=6) tpm_cpu_usage_infos = defaultdict( lambda: dict(nb_instances=0, usage={x: [] for x in range(0, 7)})) for account in accounts: usage = AWSStat.latest_daily_cpu_usage_by_tag(account.key) if tag in usage['tags']: for tag_infos in usage['tags'][tag]: tpm_cpu_usage_infos[tag_infos['tag_value']][ 'nb_instances'] += tag_infos['nb_instances'] for day in tag_infos['usage']: tpm_cpu_usage_infos[tag_infos['tag_value']]['usage'][ days_of_the_week[day['day']]].append(day['cpu']) for tag_value, tag_infos in tpm_cpu_usage_infos.iteritems(): tag_res = dict(nb_instances=tag_infos['nb_instances'], tag_value=tag_value, usage=[]) for usage_day_idx, usage_details in tag_infos['usage'].iteritems(): cpu_usage = 0 if not len( usage_details) else sum(usage_details) / len(usage_details) tag_res['usage'].append(dict(day=usage_day_idx, cpu=cpu_usage)) tag_res['usage'].sort(key=lambda x: x['day']) tag_res['usage'] = [ dict(day=calendar.day_name[x['day']], cpu=x['cpu']) for x in tag_res['usage'] ] res.append(tag_res) return jsonify(dict(values=res))
def get_aws_stopped_instances(accounts): return jsonify( AWSStat.latest_stopped_instances_report(account.key for account in accounts))
def get_aws_available_volumes(accounts): return jsonify( AWSStat.latest_available_volumes(account.key for account in accounts))
def get_s3_space_usage(accounts): """--- get: tags: - aws produces: - application/json description: &desc Get S3 space usage summary: *desc responses: 200: description: List of buckets schema: properties: buckets: type: array items: properties: location: type: string name: type: string type: type: string provider: type: string used_space: type: number prices: type: array items: properties: name: type: string cost: type: number 403: description: Not logged in 404: description: AWS account not registered """ res = [AWSStat.latest_s3_space_usage(account) for account in accounts] buckets = sum((r['buckets'] for r in res if r and 'buckets' in r), []) last_updated = None #TODO: Decide how to handle this with several accounts. next_update_delta = timedelta( hours=AWS_KEY_PROCESSING_INTERVAL_HOURS ) #TODO: Same here, I put a value for the demos. next_update, remainder = divmod(next_update_delta.seconds, 3600) if next_update < 1: next_update = 1 for bucket in buckets: bucket[ 'metadata'] = None #TODO: See how to get the account the bucket comes from. if not buckets: return jsonify(message=get_next_update_estimation_message_aws( accounts, AWS_KEY_PROCESSING_INTERVAL_HOURS), last_updated=last_updated, next_update=next_update) return jsonify({ 'buckets': buckets, 'last_updated': last_updated, 'next_update': next_update, })
def aws_on_demand_switch_suggestion(accounts): return jsonify( AWSStat.latest_on_demand_to_reserved_suggestion( account.key for account in accounts))
def process_aws_key(key_ids): # TODO: remove this if not isinstance(key_ids, list): key_ids = [key_ids] keys = list(AWSKey.query.filter(AWSKey.id.in_(key_ids))) if not keys: return key = min(keys, key=lambda k: k.last_fetched or datetime(1970, 1, 1)) since = None if key.last_fetched: since = key.last_fetched - aws_data_grace if key.last_fetched > datetime.utcnow() - aws_data_fetch_freq: return for k in keys: k.last_fetched = datetime.utcnow() db.session.commit() try: processing_start = datetime.utcnow() session = boto3.Session(aws_access_key_id=key.key, aws_secret_access_key=key.secret) AWSStat(key=key.key, time=datetime.utcnow(), stat='instances', data=get_instance_stats(session)).save() def get_instance_metric_records(): for metric in get_instance_metrics(session, since): id = checksum(str(metric['time']), metric['metric'], metric['resource']) yield AWSMetric(meta={'id': id}, key=key.key, **metric) bulk_save(get_instance_metric_records()) def get_volume_metric_records(): for metric in get_volume_metrics(session, since): id = checksum(str(metric['time']), metric['metric'], metric['resource']) yield AWSMetric(meta={'id': id}, key=key.key, **metric) bulk_save(get_volume_metric_records()) def get_bucket_metrics_records(): for metric in get_bucket_metrics(session, since): id = checksum(str(metric['time']), metric['metric'], metric['resource']) yield AWSMetric(meta={'id': id}, key=key.key, **metric) bulk_save(get_bucket_metrics_records()) def get_bucket_object_access_records(): for log in get_object_access_logs(session, since): id = checksum(str(log['time']), log['bucket'], log['object']) yield AWSAccessLog(meta={'id': id}, key=key.key, **log) bulk_save(get_bucket_object_access_records()) AWSStat(key=key.key, time=datetime.utcnow(), stat='ondemandtoreserved', data=get_on_demand_to_reserved_suggestion(session, key)).save() AWSStat(key=key.key, time=datetime.utcnow(), stat='s3spaceusage', data=get_s3_space_usage(key)).save() AWSStat(key=key.key, time=datetime.utcnow(), stat='detachedvolumes', data=get_available_volumes(session)).save() AWSStat(key=key.key, time=datetime.utcnow(), stat='hourlycpubytag', data=get_hourly_cpu_usage_by_tag(session, key.key)).save() AWSStat(key=key.key, time=datetime.utcnow(), stat='dailycpubytag', data=get_daily_cpu_usage_by_tag(session, key.key)).save() my_resources_record = MyResourcesAWS.query.filter( MyResourcesAWS.key == key.key).order_by(desc( MyResourcesAWS.date)).first() if not my_resources_record: res = get_providers_comparison_aws(key) if res is not None: my_resources_record = MyResourcesAWS(key=key.key, date=datetime.utcnow()) my_resources_record.set_json(res) db.session.add(my_resources_record) db.session.commit() my_db_resources_record = MyDBResourcesAWS.query.filter( MyDBResourcesAWS.key == key.key).order_by( desc(MyDBResourcesAWS.date)).first() if not my_db_resources_record: res = compare_rds_instances(key) my_db_resources_record = MyDBResourcesAWS(key=key.key, date=datetime.utcnow()) my_db_resources_record.set_json(res) db.session.add(my_db_resources_record) db.session.commit() key.error_status = None key.last_duration = (datetime.utcnow() - processing_start).total_seconds() db.session.commit() except Exception as e: key.last_duration = (datetime.utcnow() - processing_start).total_seconds() except_handler(e, key)
def get_instances_state(): for region, instance in get_all_instances(session): yield AWSStat(key=key.key, time=now, stat='instancestate/' + instance.id, data=dict(state=instance.state['Name']))
@runner.task def generate_stopped_instances_report(key): try: session = boto3.Session(aws_access_key_id=key.key, aws_secret_access_key=key.secret) except Exception, e: logging.error("[user={}][key={}] {}".format(key.user.email, key.pretty or key.key, str(e))) aws_credentials_error(key, traceback.format_exc()) key.error_status = u"bad_key" db.session.commit() return AWSStat(key=key.key, time=datetime.utcnow(), stat='stoppedinstancesreport', data=get_stopped_instances_report(session, key.key)).save() @runner.task def send_email_alternative(email, subject, content_plain, content_html, bypass_debug=False): f_send_email_alternative(email, subject, content_plain, content_html, bypass_debug) @runner.task def send_email(email, subject, text, html, bypass_debug=False):
def aws_accounts_m_stats_s3bucketsizepertag(accounts, tag): """--- get: tags: - aws produces: - application/csv description: &desc Stats about cost and usage of bandwith and storag on s3 buckets, organised by tag summary: *desc responses: 200: description: Stats about cost and usage of bandwith and storag on s3 buckets, organised by tag 403: description: Not logged in 404: description: AWS account not registered """ assert len(accounts) > 0 def _get_total_sizes_cost_and_names(bucket_names_list, bucket_list): total_size = 0 total_cost = 0 names = "" for bucket in bucket_list['buckets']: if _check_if_in_list(bucket_names_list, bucket['name'], 'bucket_name') is not None: total_size += float(bucket['used_space']) total_cost += _check_if_in_list(bucket['prices'], bucket['provider'], 'provider')['cost'] names += bucket['name'] + ", " return total_size, names[:-2], total_cost def _get_bandwith_info(account, bucket_names): bucket_ids = [ bucket for bucket in (bucket_names if isinstance(bucket_names, list ) else [bucket_names]) ] bandwith_cost = AWSDetailedLineitem.get_s3_bandwith_info_and_cost_per_name( account.get_aws_user_id(), bucket_ids) return bandwith_cost def _iterate_over_buckets_in_tag_for_total(bucket_bandwith_stat): total_cost = 0 for bucket in (bucket_bandwith_stat if bucket_bandwith_stat is not None else []): total_cost += bucket['cost'] return total_cost def _iterate_over_buckets_and_make_breakdown_bandwith_stat( bucket_bandwith_stat, buff_row_csv, tag_value): bandwith_cost = 0 for bucket in bucket_bandwith_stat: bandwith_cost += bucket['cost'] for elem in bucket['transfer_stats']: if elem['type'] in buff_row_csv: buff_row_csv[elem['type']] += (elem['data'] * 1024 * 1024 * 1024) else: buff_row_csv[elem['type']] = (elem['data'] * 1024 * 1024 * 1024) buff_row_csv['bandwith_cost'] = bandwith_cost return buff_row_csv def _build_csv_row_and_add_header(bucket_list_tagged, bucket_list, account, csv_header, csv_row_all): for tag_value in bucket_list_tagged['tag_value']: bucket_info = _get_total_sizes_cost_and_names( tag_value['s3_buckets'], bucket_list) bucket_bandwith_stat = _get_bandwith_info(account, bucket_info[1]) csv_header = _append_to_header_list( csv_header, _build_list_used_transfer_types(bucket_bandwith_stat)) csv_row = { "tag_key": bucket_list_tagged['tag_key'].split(':')[1], "tag_value": tag_value['tag_value'], "account_id": tag_value['s3_buckets'][0]["account_id"], "total_size": bucket_info[0], "bucket_names": bucket_info[1], "storage_cost": bucket_info[2], } csv_row = _iterate_over_buckets_and_make_breakdown_bandwith_stat( bucket_bandwith_stat, csv_row, tag_value) csv_row['total_cost'] = csv_row['storage_cost'] + csv_row[ 'bandwith_cost'] csv_row_all.append(csv_row) return csv_header, csv_row_all def _select_bucket_list_tag(bucket_list_per_tag, tag): for bucket_list_tagged in bucket_list_per_tag: if tag in bucket_list_tagged['tag_key'].split(':')[1]: return bucket_list_tagged csv_header = [ "account_id", "tag_key", "tag_value", "total_size", "bucket_names", "bandwith_cost", "storage_cost", "total_cost" ] csv_data = [] for account in accounts: bucket_list_per_tag = AWSDetailedLineitem.get_s3_buckets_per_tag( account.get_aws_user_id()) bucket_list_tagged = _select_bucket_list_tag(bucket_list_per_tag, tag) bucket_list = AWSStat.latest_s3_space_usage(account) csv_header, csv_data = _build_csv_row_and_add_header( bucket_list_tagged, bucket_list, account, csv_header, csv_data) if 'csv' in request.args: return Response(generate_csv_clean(csv_data, csv_header)) return jsonify(res=csv_data)
def aws_accounts_m_stats_s3bucketsizepername(accounts): """--- get: tags: - aws produces: - application/csv description: &desc Stats about cost and usage of bandwith and storag on s3 buckets, organised by name summary: *desc responses: 200: description: Stats about cost and usage of bandwith and storag on s3 buckets, organised by name 403: description: Not logged in 404: description: AWS account not registered """ def _create_bandwith_breakdown(transfer_types_list, csv_row, bucket_bandwith_stat): for elem in transfer_types_list: _current_transfer_type = _check_if_in_list( bucket_bandwith_stat['transfer_stats'], elem, 'type') if _current_transfer_type is not None: csv_row[elem] = _current_transfer_type[ 'data'] * 1024 * 1024 * 1024 # The is by default given in GB return csv_row def _create_csv_rows(bucket_list, account, bandwith_cost, csv_row_all): for bucket in bucket_list['buckets']: csv_row = { 'account_id': account.get_aws_user_id(), 'used_space': bucket['used_space'], 'name': bucket['name'], 'storage_cost': _check_if_in_list(bucket['prices'], bucket['provider'], 'provider')['cost'] } bucket_bandwith_stat = _check_if_in_list(bandwith_cost, bucket['name'], 'bucket_name') if bucket_bandwith_stat is not None: csv_row = _create_bandwith_breakdown(transfer_types_list, csv_row, bucket_bandwith_stat) csv_row['bandwith_cost'] = bucket_bandwith_stat[ 'cost'] if bucket_bandwith_stat is not None else 0 csv_row['total_cost'] = csv_row['storage_cost'] + csv_row[ 'bandwith_cost'] csv_row_all.append(csv_row) return csv_row_all assert len(accounts) > 0 csv_header = [ 'account_id', 'name', 'used_space', 'storage_cost', 'bandwith_cost', 'total_cost' ] csv_row_all = [] for account in accounts: bucket_list = AWSStat.latest_s3_space_usage(account) bucket_ids = [ bucket['name'] for bucket in ( bucket_list['buckets'] if bucket_list is not None else []) ] bandwith_cost = AWSDetailedLineitem.get_s3_bandwith_info_and_cost_per_name( account.get_aws_user_id(), bucket_ids) transfer_types_list = _build_list_used_transfer_types(bandwith_cost) csv_header = _append_to_header_list(csv_header, transfer_types_list) csv_row_all = _create_csv_rows(bucket_list, account, bandwith_cost, csv_row_all) if len(csv_row_all) > 0 and csv_row_all[0] is None: csv_row_all = [] if 'csv' in request.args: return Response(generate_csv_clean(csv_row_all, csv_header)) return jsonify(accounts=csv_row_all)
def compute_reservation_forecast(keys): if isinstance(keys, models.AWSKey): keys = [keys] elif not isinstance(keys, list): keys = list(keys) if not all(isinstance(k, models.AWSKey) for k in keys): raise TypeError('All keys must be AWSKey.') now = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) range_end = now.replace(hour=0, minute=0, second=0, microsecond=0) range_end -= timedelta(days=1) range_start = range_end - timedelta(days=120) range_start = range_start.replace(day=1) s = AWSDetailedLineitem.get_instance_type( [k.get_aws_user_id() for k in keys], date_from=range_start, date_to=range_end) instance_type_hours = defaultdict(list) first_hour = datetime(2099, 1, 1) for r in s: rhour = datetime.strptime(r['hour'], "%Y-%m-%dT%H:%M:%S") if r['region'] != 'unknown': # Some EC2 instances have no region, sometimes... instance_type_hours[(r['region'], r['instance'])].append( (rhour, r['ridCount'])) first_hour = min(first_hour, rhour) hours_ahead = 120 * 24 total_hours = (range_end - first_hour).total_seconds() / 3600 - 1 + hours_ahead instance_types = [] lookup = get_instance_lookup() for (region, instance_type), hours in instance_type_hours.iteritems(): hours = count_forecast(hours, range_start, now, hours_ahead) prices = lookup[region, instance_type] price_results = get_monthly_prices( total_hours, hours, [p['amortized'] for p in prices['reserved']], prices['ondemand']['amortized']) ps = [] for pricing, (_, count, months) in zip(prices['reserved'] + [prices['ondemand']], price_results): pricing = dict(pricing) if count is not None: pricing['count'] = count pricing['months'] = [ dict(month=m.strftime('%Y-%m'), cost=c) for m, c in months[:-1] ] ps.append(pricing) instance_types.append( dict(region=region, type=instance_type, pricing_options=ps)) available_volumes = AWSStat.latest_available_volumes([k.key for k in keys]) now = datetime.utcnow() date_from = now.replace(hour=0, minute=0, second=0, microsecond=0) - relativedelta(months=6) date_to = now.replace(hour=23, minute=59, second=59, microsecond=999999) volume_monthly_costs = AWSDetailedLineitem.get_monthly_cost_by_resource( available_volumes['volumes'] if 'volumes' in available_volumes else (), date_from=date_from, date_to=date_to) resources = AWSMetric.underutilized_resources([k.key for k in keys]) rids = set(r['id'] for r in resources['resources']) months = AWSDetailedLineitem.get_monthly_cost_by_resource( rids, date_from=date_from, date_to=date_to) reduced_instance_costs = {k: v * 0.2 for k, v in months.iteritems()} return dict( instances=instance_types, volume_monthly_costs=volume_monthly_costs, reduced_instance_costs=reduced_instance_costs, )