Example #1
0
def logs_cleanup(resource_id, filter_settings):
    request = get_current_request()
    request.tm.begin()
    es_query = {
        "query": {
            "bool": {
                "filter": [{
                    "term": {
                        "resource_id": resource_id
                    }
                }]
            }
        }
    }

    query = DBSession.query(Log).filter(Log.resource_id == resource_id)
    if filter_settings["namespace"]:
        query = query.filter(Log.namespace == filter_settings["namespace"][0])
        es_query["query"]["bool"]["filter"].append(
            {"term": {
                "namespace": filter_settings["namespace"][0]
            }})
    query.delete(synchronize_session=False)
    request.tm.commit()
    Datastores.es.delete_by_query(index="rcae_l_*",
                                  doc_type="log",
                                  body=es_query,
                                  conflicts="proceed")
Example #2
0
def unique_alert_email_validator(form, field):
    q = DBSession.query(AlertChannel)
    q = q.filter(AlertChannel.channel_name == 'email')
    q = q.filter(AlertChannel.channel_value == field.data)
    email = q.first()
    if email:
        raise wtforms.ValidationError(
            'This email already exists in alert system')
Example #3
0
def logs_cleanup(resource_id, filter_settings):
    request = get_current_request()
    request.tm.begin()
    es_query = {
        "_source": False,
        "size": 5000,
        "query": {
            "filtered": {
                "filter": {
                    "and": [{
                        "term": {
                            "resource_id": resource_id
                        }
                    }]
                }
            }
        }
    }

    query = DBSession.query(Log).filter(Log.resource_id == resource_id)
    if filter_settings['namespace']:
        query = query.filter(Log.namespace == filter_settings['namespace'][0])
        es_query['query']['filtered']['filter']['and'].append(
            {"term": {
                "namespace": filter_settings['namespace'][0]
            }})
    query.delete(synchronize_session=False)
    request.tm.commit()
    result = request.es_conn.search(es_query,
                                    index='rcae_l_*',
                                    doc_type='log',
                                    es_scroll='1m',
                                    es_search_type='scan')
    scroll_id = result['_scroll_id']
    while True:
        log.warning('log_cleanup, app:{} ns:{} batch'.format(
            resource_id, filter_settings['namespace']))
        es_docs_to_delete = []
        result = request.es_conn.send_request('POST', ['_search', 'scroll'],
                                              body=scroll_id,
                                              query_params={"scroll": '1m'})
        scroll_id = result['_scroll_id']
        if not result['hits']['hits']:
            break
        for doc in result['hits']['hits']:
            es_docs_to_delete.append({
                "id": doc['_id'],
                "index": doc['_index']
            })

        for batch in in_batches(es_docs_to_delete, 10):
            Datastores.es.bulk([
                Datastores.es.delete_op(doc_type='log', **to_del)
                for to_del in batch
            ])
Example #4
0
def update_tag_counter(tag_name, tag_value, count):
    try:
        query = DBSession.query(Tag).filter(Tag.name == tag_name).filter(
            sa.cast(Tag.value, sa.types.TEXT) == sa.cast(
                json.dumps(tag_value), sa.types.TEXT))
        query.update(
            {
                'times_seen': Tag.times_seen + count,
                'last_timestamp': datetime.utcnow()
            },
            synchronize_session=False)
        session = DBSession()
        mark_changed(session)
        return True
    except Exception as exc:
        print_traceback(log)
        update_tag_counter.retry(exc=exc)
Example #5
0
def update_tag_counter(tag_name, tag_value, count):
    try:
        query = (DBSession.query(Tag).filter(Tag.name == tag_name).filter(
            sa.cast(Tag.value, sa.types.TEXT) == sa.cast(
                json.dumps(tag_value), sa.types.TEXT)))
        query.update(
            {
                "times_seen": Tag.times_seen + count,
                "last_timestamp": datetime.utcnow()
            },
            synchronize_session=False,
        )
        session = DBSession()
        mark_changed(session)
        return True
    except Exception as exc:
        print_traceback(log)
        if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
            raise
        update_tag_counter.retry(exc=exc)
Example #6
0
def slow_request(request):
    """
    Test a request that has some slow entries - including nested calls
    """
    users = DBSession.query(User).all()
    import random

    some_val = random.random()
    import threading

    t_id = id(threading.currentThread())
    log.warning("slow_log %s %s " % (some_val, t_id))
    log.critical("tid %s" % t_id)

    @time_trace(name="baz_func %s" % some_val, min_duration=0.1)
    def baz(arg):
        time.sleep(0.32)
        return arg

    requests.get("http://ubuntu.com")

    @time_trace(name="foo_func %s %s" % (some_val, t_id), min_duration=0.1)
    def foo(arg):
        time.sleep(0.52)
        log.warning("foo_func %s %s" % (some_val, t_id))
        requests.get("http://ubuntu.com?test=%s" % some_val)
        return bar(arg)

    @time_trace(name="bar_func %s %s" % (some_val, t_id), min_duration=0.1)
    def bar(arg):
        log.warning("bar_func %s %s" % (some_val, t_id))
        time.sleep(1.52)
        baz(arg)
        baz(arg)
        return baz(arg)

    foo("a")
    return {}
Example #7
0
def add_logs(resource_id, request_params, dataset, **kwargs):
    proto_version = request_params.get("protocol_version")
    current_time = datetime.utcnow().replace(second=0, microsecond=0)

    try:
        es_docs = collections.defaultdict(list)
        resource = ApplicationService.by_id_cached()(resource_id)
        resource = DBSession.merge(resource, load=False)
        ns_pairs = []
        for entry in dataset:
            # gather pk and ns so we can remove older versions of row later
            if entry["primary_key"] is not None:
                ns_pairs.append({
                    "pk": entry["primary_key"],
                    "ns": entry["namespace"]
                })
            log_entry = Log()
            log_entry.set_data(entry, resource=resource)
            log_entry._skip_ft_index = True
            resource.logs.append(log_entry)
            DBSession.flush()
            # insert non pk rows first
            if entry["primary_key"] is None:
                es_docs[log_entry.partition_id].append(log_entry.es_doc())

        # 2nd pass to delete all log entries from db for same pk/ns pair
        if ns_pairs:
            ids_to_delete = []
            es_docs = collections.defaultdict(list)
            es_docs_to_delete = collections.defaultdict(list)
            found_pkey_logs = LogService.query_by_primary_key_and_namespace(
                list_of_pairs=ns_pairs)
            log_dict = {}
            for log_entry in found_pkey_logs:
                log_key = (log_entry.primary_key, log_entry.namespace)
                if log_key not in log_dict:
                    log_dict[log_key] = []
                log_dict[log_key].append(log_entry)

            for ns, entry_list in log_dict.items():
                entry_list = sorted(entry_list, key=lambda x: x.timestamp)
                # newest row needs to be indexed in es
                log_entry = entry_list[-1]
                # delete everything from pg and ES, leave the last row in pg
                for e in entry_list[:-1]:
                    ids_to_delete.append(e.log_id)
                    es_docs_to_delete[e.partition_id].append(e.delete_hash)

                es_docs_to_delete[log_entry.partition_id].append(
                    log_entry.delete_hash)

                es_docs[log_entry.partition_id].append(log_entry.es_doc())

            if ids_to_delete:
                query = DBSession.query(Log).filter(
                    Log.log_id.in_(ids_to_delete))
                query.delete(synchronize_session=False)
            if es_docs_to_delete:
                # batch this to avoid problems with default ES bulk limits
                for es_index in es_docs_to_delete.keys():
                    for batch in in_batches(es_docs_to_delete[es_index], 20):
                        query = {"query": {"terms": {"delete_hash": batch}}}

                        try:
                            Datastores.es.delete_by_query(
                                index=es_index,
                                doc_type="log",
                                body=query,
                                conflicts="proceed",
                            )
                        except elasticsearch.exceptions.NotFoundError as exc:
                            msg = "skipping index {}".format(es_index)
                            log.info(msg)

        total_logs = len(dataset)

        log_msg = "LOG_NEW: %s, entries: %s, proto:%s" % (
            str(resource),
            total_logs,
            proto_version,
        )
        log.info(log_msg)
        # mark_changed(session)
        redis_pipeline = Datastores.redis.pipeline(transaction=False)
        key = REDIS_KEYS["counters"]["logs_per_minute"].format(current_time)
        redis_pipeline.incr(key, total_logs)
        redis_pipeline.expire(key, 3600 * 24)
        key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format(
            resource.owner_user_id, current_time)
        redis_pipeline.incr(key, total_logs)
        redis_pipeline.expire(key, 3600)
        key = REDIS_KEYS["counters"]["logs_per_hour_per_app"].format(
            resource_id, current_time.replace(minute=0))
        redis_pipeline.incr(key, total_logs)
        redis_pipeline.expire(key, 3600 * 24 * 7)
        redis_pipeline.sadd(
            REDIS_KEYS["apps_that_got_new_data_per_hour"].format(
                current_time.replace(minute=0)),
            resource_id,
        )
        redis_pipeline.execute()
        add_logs_es(es_docs)
        return True
    except Exception as exc:
        print_traceback(log)
        if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
            raise
        add_logs.retry(exc=exc)