def add_metrics(resource_id, request_params, dataset, proto_version): current_time = datetime.utcnow().replace(second=0, microsecond=0) try: resource = ApplicationService.by_id_cached()(resource_id) resource = DBSession.merge(resource, load=False) es_docs = [] rows = [] for metric in dataset: tags = dict(metric["tags"]) server_n = tags.get("server_name", metric["server_name"]).lower() tags["server_name"] = server_n or "unknown" new_metric = Metric( timestamp=metric["timestamp"], resource_id=resource.resource_id, namespace=metric["namespace"], tags=tags, ) rows.append(new_metric) es_docs.append(new_metric.es_doc()) session = DBSession() session.bulk_save_objects(rows) session.flush() action = "METRICS" metrics_msg = "%s: %s, metrics: %s, proto:%s" % ( action, str(resource), len(dataset), proto_version, ) log.info(metrics_msg) mark_changed(session) redis_pipeline = Datastores.redis.pipeline(transaction=False) key = REDIS_KEYS["counters"]["metrics_per_minute"].format(current_time) redis_pipeline.incr(key, len(rows)) redis_pipeline.expire(key, 3600 * 24) key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format( resource.owner_user_id, current_time) redis_pipeline.incr(key, len(rows)) redis_pipeline.expire(key, 3600) key = REDIS_KEYS["counters"]["metrics_per_hour_per_app"].format( resource_id, current_time.replace(minute=0)) redis_pipeline.incr(key, len(rows)) redis_pipeline.expire(key, 3600 * 24 * 7) redis_pipeline.sadd( REDIS_KEYS["apps_that_got_new_data_per_hour"].format( current_time.replace(minute=0)), resource_id, ) redis_pipeline.execute() add_metrics_es(es_docs) return True except Exception as exc: print_traceback(log) if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]: raise add_metrics.retry(exc=exc)
def add_metrics(resource_id, request_params, dataset, proto_version): current_time = datetime.utcnow().replace(second=0, microsecond=0) try: resource = ApplicationService.by_id_cached()(resource_id) resource = DBSession.merge(resource, load=False) es_docs = [] rows = [] for metric in dataset: tags = dict(metric['tags']) server_n = tags.get('server_name', metric['server_name']).lower() tags['server_name'] = server_n or 'unknown' new_metric = Metric(timestamp=metric['timestamp'], resource_id=resource.resource_id, namespace=metric['namespace'], tags=tags) rows.append(new_metric) es_docs.append(new_metric.es_doc()) session = DBSession() session.bulk_save_objects(rows) session.flush() action = 'METRICS' metrics_msg = '%s: %s, metrics: %s, proto:%s' % ( action, str(resource), len(dataset), proto_version) log.info(metrics_msg) mark_changed(session) redis_pipeline = Datastores.redis.pipeline(transaction=False) key = REDIS_KEYS['counters']['metrics_per_minute'].format(current_time) redis_pipeline.incr(key, len(rows)) redis_pipeline.expire(key, 3600 * 24) key = REDIS_KEYS['counters']['events_per_minute_per_user'].format( resource.owner_user_id, current_time) redis_pipeline.incr(key, len(rows)) redis_pipeline.expire(key, 3600) key = REDIS_KEYS['counters']['metrics_per_hour_per_app'].format( resource_id, current_time.replace(minute=0)) redis_pipeline.incr(key, len(rows)) redis_pipeline.expire(key, 3600 * 24 * 7) redis_pipeline.sadd( REDIS_KEYS['apps_that_got_new_data_per_hour'].format( current_time.replace(minute=0)), resource_id) redis_pipeline.execute() add_metrics_es(es_docs) return True except Exception as exc: print_traceback(log) add_metrics.retry(exc=exc)
def add_uptime_stats(params, metric): proto_version = parse_proto(params.get("protocol_version")) try: application = ApplicationService.by_id_cached()(metric["resource_id"]) application = DBSession.merge(application, load=False) if not application: return start_interval = convert_date(metric["timestamp"]) start_interval = start_interval.replace(second=0, microsecond=0) new_metric = UptimeMetric( start_interval=start_interval, response_time=metric["response_time"], status_code=metric["status_code"], is_ok=metric["is_ok"], location=metric.get("location", 1), tries=metric["tries"], resource_id=application.resource_id, owner_user_id=application.owner_user_id, ) DBSession.add(new_metric) DBSession.flush() add_metrics_uptime([new_metric.es_doc()]) if metric["is_ok"]: event_types = [Event.types["uptime_alert"]] statuses = [Event.statuses["active"]] # get events older than 5 min events = EventService.by_type_and_status( event_types, statuses, older_than=(datetime.utcnow() - timedelta(minutes=6)), app_ids=[application.resource_id], ) for event in events: event.close() else: UptimeMetricService.check_for_alert(application, metric=metric) action = "METRICS UPTIME" metrics_msg = "%s: %s, proto:%s" % (action, str(application), proto_version) log.info(metrics_msg) session = DBSession() mark_changed(session) return True except Exception as exc: print_traceback(log) add_uptime_stats.retry(exc=exc)
def add_logs(resource_id, request_params, dataset, **kwargs): proto_version = request_params.get("protocol_version") current_time = datetime.utcnow().replace(second=0, microsecond=0) try: es_docs = collections.defaultdict(list) resource = ApplicationService.by_id_cached()(resource_id) resource = DBSession.merge(resource, load=False) ns_pairs = [] for entry in dataset: # gather pk and ns so we can remove older versions of row later if entry["primary_key"] is not None: ns_pairs.append({ "pk": entry["primary_key"], "ns": entry["namespace"] }) log_entry = Log() log_entry.set_data(entry, resource=resource) log_entry._skip_ft_index = True resource.logs.append(log_entry) DBSession.flush() # insert non pk rows first if entry["primary_key"] is None: es_docs[log_entry.partition_id].append(log_entry.es_doc()) # 2nd pass to delete all log entries from db for same pk/ns pair if ns_pairs: ids_to_delete = [] es_docs = collections.defaultdict(list) es_docs_to_delete = collections.defaultdict(list) found_pkey_logs = LogService.query_by_primary_key_and_namespace( list_of_pairs=ns_pairs) log_dict = {} for log_entry in found_pkey_logs: log_key = (log_entry.primary_key, log_entry.namespace) if log_key not in log_dict: log_dict[log_key] = [] log_dict[log_key].append(log_entry) for ns, entry_list in log_dict.items(): entry_list = sorted(entry_list, key=lambda x: x.timestamp) # newest row needs to be indexed in es log_entry = entry_list[-1] # delete everything from pg and ES, leave the last row in pg for e in entry_list[:-1]: ids_to_delete.append(e.log_id) es_docs_to_delete[e.partition_id].append(e.delete_hash) es_docs_to_delete[log_entry.partition_id].append( log_entry.delete_hash) es_docs[log_entry.partition_id].append(log_entry.es_doc()) if ids_to_delete: query = DBSession.query(Log).filter( Log.log_id.in_(ids_to_delete)) query.delete(synchronize_session=False) if es_docs_to_delete: # batch this to avoid problems with default ES bulk limits for es_index in es_docs_to_delete.keys(): for batch in in_batches(es_docs_to_delete[es_index], 20): query = {"query": {"terms": {"delete_hash": batch}}} try: Datastores.es.delete_by_query( index=es_index, doc_type="log", body=query, conflicts="proceed", ) except elasticsearch.exceptions.NotFoundError as exc: msg = "skipping index {}".format(es_index) log.info(msg) total_logs = len(dataset) log_msg = "LOG_NEW: %s, entries: %s, proto:%s" % ( str(resource), total_logs, proto_version, ) log.info(log_msg) # mark_changed(session) redis_pipeline = Datastores.redis.pipeline(transaction=False) key = REDIS_KEYS["counters"]["logs_per_minute"].format(current_time) redis_pipeline.incr(key, total_logs) redis_pipeline.expire(key, 3600 * 24) key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format( resource.owner_user_id, current_time) redis_pipeline.incr(key, total_logs) redis_pipeline.expire(key, 3600) key = REDIS_KEYS["counters"]["logs_per_hour_per_app"].format( resource_id, current_time.replace(minute=0)) redis_pipeline.incr(key, total_logs) redis_pipeline.expire(key, 3600 * 24 * 7) redis_pipeline.sadd( REDIS_KEYS["apps_that_got_new_data_per_hour"].format( current_time.replace(minute=0)), resource_id, ) redis_pipeline.execute() add_logs_es(es_docs) return True except Exception as exc: print_traceback(log) if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]: raise add_logs.retry(exc=exc)