def logs_cleanup(resource_id, filter_settings): request = get_current_request() request.tm.begin() es_query = { "query": { "bool": { "filter": [{ "term": { "resource_id": resource_id } }] } } } query = DBSession.query(Log).filter(Log.resource_id == resource_id) if filter_settings["namespace"]: query = query.filter(Log.namespace == filter_settings["namespace"][0]) es_query["query"]["bool"]["filter"].append( {"term": { "namespace": filter_settings["namespace"][0] }}) query.delete(synchronize_session=False) request.tm.commit() Datastores.es.delete_by_query(index="rcae_l_*", doc_type="log", body=es_query, conflicts="proceed")
def unique_alert_email_validator(form, field): q = DBSession.query(AlertChannel) q = q.filter(AlertChannel.channel_name == 'email') q = q.filter(AlertChannel.channel_value == field.data) email = q.first() if email: raise wtforms.ValidationError( 'This email already exists in alert system')
def logs_cleanup(resource_id, filter_settings): request = get_current_request() request.tm.begin() es_query = { "_source": False, "size": 5000, "query": { "filtered": { "filter": { "and": [{ "term": { "resource_id": resource_id } }] } } } } query = DBSession.query(Log).filter(Log.resource_id == resource_id) if filter_settings['namespace']: query = query.filter(Log.namespace == filter_settings['namespace'][0]) es_query['query']['filtered']['filter']['and'].append( {"term": { "namespace": filter_settings['namespace'][0] }}) query.delete(synchronize_session=False) request.tm.commit() result = request.es_conn.search(es_query, index='rcae_l_*', doc_type='log', es_scroll='1m', es_search_type='scan') scroll_id = result['_scroll_id'] while True: log.warning('log_cleanup, app:{} ns:{} batch'.format( resource_id, filter_settings['namespace'])) es_docs_to_delete = [] result = request.es_conn.send_request('POST', ['_search', 'scroll'], body=scroll_id, query_params={"scroll": '1m'}) scroll_id = result['_scroll_id'] if not result['hits']['hits']: break for doc in result['hits']['hits']: es_docs_to_delete.append({ "id": doc['_id'], "index": doc['_index'] }) for batch in in_batches(es_docs_to_delete, 10): Datastores.es.bulk([ Datastores.es.delete_op(doc_type='log', **to_del) for to_del in batch ])
def update_tag_counter(tag_name, tag_value, count): try: query = DBSession.query(Tag).filter(Tag.name == tag_name).filter( sa.cast(Tag.value, sa.types.TEXT) == sa.cast( json.dumps(tag_value), sa.types.TEXT)) query.update( { 'times_seen': Tag.times_seen + count, 'last_timestamp': datetime.utcnow() }, synchronize_session=False) session = DBSession() mark_changed(session) return True except Exception as exc: print_traceback(log) update_tag_counter.retry(exc=exc)
def update_tag_counter(tag_name, tag_value, count): try: query = (DBSession.query(Tag).filter(Tag.name == tag_name).filter( sa.cast(Tag.value, sa.types.TEXT) == sa.cast( json.dumps(tag_value), sa.types.TEXT))) query.update( { "times_seen": Tag.times_seen + count, "last_timestamp": datetime.utcnow() }, synchronize_session=False, ) session = DBSession() mark_changed(session) return True except Exception as exc: print_traceback(log) if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]: raise update_tag_counter.retry(exc=exc)
def slow_request(request): """ Test a request that has some slow entries - including nested calls """ users = DBSession.query(User).all() import random some_val = random.random() import threading t_id = id(threading.currentThread()) log.warning("slow_log %s %s " % (some_val, t_id)) log.critical("tid %s" % t_id) @time_trace(name="baz_func %s" % some_val, min_duration=0.1) def baz(arg): time.sleep(0.32) return arg requests.get("http://ubuntu.com") @time_trace(name="foo_func %s %s" % (some_val, t_id), min_duration=0.1) def foo(arg): time.sleep(0.52) log.warning("foo_func %s %s" % (some_val, t_id)) requests.get("http://ubuntu.com?test=%s" % some_val) return bar(arg) @time_trace(name="bar_func %s %s" % (some_val, t_id), min_duration=0.1) def bar(arg): log.warning("bar_func %s %s" % (some_val, t_id)) time.sleep(1.52) baz(arg) baz(arg) return baz(arg) foo("a") return {}
def add_logs(resource_id, request_params, dataset, **kwargs): proto_version = request_params.get("protocol_version") current_time = datetime.utcnow().replace(second=0, microsecond=0) try: es_docs = collections.defaultdict(list) resource = ApplicationService.by_id_cached()(resource_id) resource = DBSession.merge(resource, load=False) ns_pairs = [] for entry in dataset: # gather pk and ns so we can remove older versions of row later if entry["primary_key"] is not None: ns_pairs.append({ "pk": entry["primary_key"], "ns": entry["namespace"] }) log_entry = Log() log_entry.set_data(entry, resource=resource) log_entry._skip_ft_index = True resource.logs.append(log_entry) DBSession.flush() # insert non pk rows first if entry["primary_key"] is None: es_docs[log_entry.partition_id].append(log_entry.es_doc()) # 2nd pass to delete all log entries from db for same pk/ns pair if ns_pairs: ids_to_delete = [] es_docs = collections.defaultdict(list) es_docs_to_delete = collections.defaultdict(list) found_pkey_logs = LogService.query_by_primary_key_and_namespace( list_of_pairs=ns_pairs) log_dict = {} for log_entry in found_pkey_logs: log_key = (log_entry.primary_key, log_entry.namespace) if log_key not in log_dict: log_dict[log_key] = [] log_dict[log_key].append(log_entry) for ns, entry_list in log_dict.items(): entry_list = sorted(entry_list, key=lambda x: x.timestamp) # newest row needs to be indexed in es log_entry = entry_list[-1] # delete everything from pg and ES, leave the last row in pg for e in entry_list[:-1]: ids_to_delete.append(e.log_id) es_docs_to_delete[e.partition_id].append(e.delete_hash) es_docs_to_delete[log_entry.partition_id].append( log_entry.delete_hash) es_docs[log_entry.partition_id].append(log_entry.es_doc()) if ids_to_delete: query = DBSession.query(Log).filter( Log.log_id.in_(ids_to_delete)) query.delete(synchronize_session=False) if es_docs_to_delete: # batch this to avoid problems with default ES bulk limits for es_index in es_docs_to_delete.keys(): for batch in in_batches(es_docs_to_delete[es_index], 20): query = {"query": {"terms": {"delete_hash": batch}}} try: Datastores.es.delete_by_query( index=es_index, doc_type="log", body=query, conflicts="proceed", ) except elasticsearch.exceptions.NotFoundError as exc: msg = "skipping index {}".format(es_index) log.info(msg) total_logs = len(dataset) log_msg = "LOG_NEW: %s, entries: %s, proto:%s" % ( str(resource), total_logs, proto_version, ) log.info(log_msg) # mark_changed(session) redis_pipeline = Datastores.redis.pipeline(transaction=False) key = REDIS_KEYS["counters"]["logs_per_minute"].format(current_time) redis_pipeline.incr(key, total_logs) redis_pipeline.expire(key, 3600 * 24) key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format( resource.owner_user_id, current_time) redis_pipeline.incr(key, total_logs) redis_pipeline.expire(key, 3600) key = REDIS_KEYS["counters"]["logs_per_hour_per_app"].format( resource_id, current_time.replace(minute=0)) redis_pipeline.incr(key, total_logs) redis_pipeline.expire(key, 3600 * 24 * 7) redis_pipeline.sadd( REDIS_KEYS["apps_that_got_new_data_per_hour"].format( current_time.replace(minute=0)), resource_id, ) redis_pipeline.execute() add_logs_es(es_docs) return True except Exception as exc: print_traceback(log) if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]: raise add_logs.retry(exc=exc)