Example #1
0
 def __init__(self, app_config={}, search_cfg={}):
     self.app_config = app_config
     self.search_cfg = search_cfg
     self.namespace_finder = namespace_finder.NamespaceFinder(app_config)
     self.model_training_triggering = {
         "defect_type": RetrainingDefectTypeTriggering(self.app_config)
     }
     self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg)
Example #2
0
 def __init__(self, app_config={}, search_cfg={}):
     self.app_config = app_config
     self.search_cfg = search_cfg
     self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg)
     self.log_preparation = LogPreparation()
     self.weighted_log_similarity_calculator = None
     if self.search_cfg["SimilarityWeightsFolder"].strip():
         self.weighted_log_similarity_calculator = weighted_similarity_calculator.\
             WeightedSimilarityCalculator(folder=self.search_cfg["SimilarityWeightsFolder"])
 def __init__(self, app_config, search_cfg):
     self.app_config = app_config
     self.search_cfg = search_cfg
     self.label2inds = {"ab": 0, "pb": 1, "si": 2}
     self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg)
     self.baseline_model = defect_type_model.DefectTypeModel(
         folder=search_cfg["GlobalDefectTypeModelFolder"])
 def __init__(self, app_config={}, search_cfg={}):
     self.app_config = app_config
     self.search_cfg = search_cfg
     self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg)
     self.log_preparation = LogPreparation()
     self.weighted_log_similarity_calculator = None
     self.global_defect_type_model = None
     self.namespace_finder = namespace_finder.NamespaceFinder(app_config)
     self.object_saver = ObjectSaver(self.app_config)
     self.initialize_common_models()
Example #5
0
class DeleteIndexService:
    def __init__(self, app_config={}, search_cfg={}):
        self.app_config = app_config
        self.search_cfg = search_cfg
        self.namespace_finder = namespace_finder.NamespaceFinder(app_config)
        self.model_training_triggering = {
            "defect_type": RetrainingDefectTypeTriggering(self.app_config)
        }
        self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg)

    @utils.ignore_warnings
    def delete_index(self, index_name):
        logger.info("Started deleting index")
        t_start = time()
        is_index_deleted = self.es_client.delete_index(index_name)
        self.namespace_finder.remove_namespaces(index_name)
        for model_type in self.model_training_triggering:
            self.model_training_triggering[model_type].remove_triggering_info(
                {"project_id": index_name})
        logger.info("Finished deleting index %.2f s", time() - t_start)
        return int(is_index_deleted)
Example #6
0
class SearchService:
    def __init__(self, app_config={}, search_cfg={}):
        self.app_config = app_config
        self.search_cfg = search_cfg
        self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg)
        self.log_preparation = LogPreparation()
        self.weighted_log_similarity_calculator = None
        if self.search_cfg["SimilarityWeightsFolder"].strip():
            self.weighted_log_similarity_calculator = weighted_similarity_calculator.\
                WeightedSimilarityCalculator(folder=self.search_cfg["SimilarityWeightsFolder"])

    def build_search_query(self, search_req, message):
        """Build search query"""
        return {
            "_source":
            ["message", "test_item", "detected_message", "stacktrace"],
            "size": 10000,
            "query": {
                "bool": {
                    "filter": [
                        {
                            "range": {
                                "log_level": {
                                    "gte": utils.ERROR_LOGGING_LEVEL
                                }
                            }
                        },
                        {
                            "exists": {
                                "field": "issue_type"
                            }
                        },
                        {
                            "term": {
                                "is_merged": False
                            }
                        },
                    ],
                    "must_not": {
                        "term": {
                            "test_item": {
                                "value": search_req.itemId,
                                "boost": 1.0
                            }
                        }
                    },
                    "must": [{
                        "bool": {
                            "should": [
                                {
                                    "wildcard": {
                                        "issue_type": "TI*"
                                    }
                                },
                                {
                                    "wildcard": {
                                        "issue_type": "ti*"
                                    }
                                },
                            ]
                        }
                    }, {
                        "terms": {
                            "launch_id": search_req.filteredLaunchIds
                        }
                    }, {
                        "more_like_this": {
                            "fields": ["message"],
                            "like": message,
                            "min_doc_freq": 1,
                            "min_term_freq": 1,
                            "minimum_should_match": "5<90%",
                            "max_query_terms":
                            self.search_cfg["MaxQueryTerms"],
                            "boost": 1.0
                        }
                    }],
                    "should": [
                        {
                            "term": {
                                "is_auto_analyzed": {
                                    "value": "false",
                                    "boost": 1.0
                                }
                            }
                        },
                    ]
                }
            }
        }

    def search_logs(self, search_req):
        """Get all logs similar to given logs"""
        similar_log_ids = set()
        logger.info("Started searching by request %s", search_req.json())
        logger.info("ES Url %s",
                    utils.remove_credentials_from_url(self.es_client.host))
        t_start = time()
        if not self.es_client.index_exists(str(search_req.projectId)):
            return []
        searched_logs = set()
        test_item_info = {}

        for message in search_req.logMessages:
            if not message.strip():
                continue

            queried_log = self.log_preparation._create_log_template()
            queried_log = self.log_preparation._fill_log_fields(
                queried_log, Log(logId=0, message=message),
                search_req.logLines)

            msg_words = " ".join(
                utils.split_words(queried_log["_source"]["message"]))
            if not msg_words.strip() or msg_words in searched_logs:
                continue
            searched_logs.add(msg_words)
            query = self.build_search_query(search_req,
                                            queried_log["_source"]["message"])
            res = self.es_client.es_client.search(index=str(
                search_req.projectId),
                                                  body=query)
            for es_res in res["hits"]["hits"]:
                test_item_info[es_res["_id"]] = es_res["_source"]["test_item"]

            _similarity_calculator = similarity_calculator.SimilarityCalculator(
                {
                    "max_query_terms": self.search_cfg["MaxQueryTerms"],
                    "min_word_length": self.search_cfg["MinWordLength"],
                    "min_should_match": "90%",
                    "number_of_log_lines": search_req.logLines
                },
                weighted_similarity_calculator=self.
                weighted_log_similarity_calculator)
            _similarity_calculator.find_similarity([(queried_log, res)],
                                                   ["message"])

            for group_id, similarity_obj in _similarity_calculator.similarity_dict[
                    "message"].items():
                log_id, _ = group_id
                similarity_percent = similarity_obj["similarity"]
                logger.debug(
                    "Log with id %s has %.3f similarity with the queried log '%s'",
                    log_id, similarity_percent,
                    queried_log["_source"]["message"])
                if similarity_percent >= self.search_cfg[
                        "SearchLogsMinSimilarity"]:
                    similar_log_ids.add((utils.extract_real_id(log_id),
                                         int(test_item_info[log_id])))

        logger.info(
            "Finished searching by request %s with %d results. It took %.2f sec.",
            search_req.json(), len(similar_log_ids),
            time() - t_start)
        return [
            SearchLogInfo(logId=log_info[0], testItemId=log_info[1])
            for log_info in similar_log_ids
        ]
Example #7
0
def create_es_client():
    """Creates Elasticsearch client"""
    _es_client = EsClient(APP_CONFIG["esHost"], SEARCH_CONFIG)
    decision_maker = boosting_decision_maker.BoostingDecisionMaker(APP_CONFIG["boostModelFolder"])
    _es_client.set_boosting_decision_maker(decision_maker)
    return _es_client
 def __init__(self, app_config={}, search_cfg={}):
     self.app_config = app_config
     self.search_cfg = search_cfg
     self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg)
     self.log_preparation = LogPreparation()
class ClusterService:
    def __init__(self, app_config={}, search_cfg={}):
        self.app_config = app_config
        self.search_cfg = search_cfg
        self.es_client = EsClient(app_config=app_config, search_cfg=search_cfg)
        self.log_preparation = LogPreparation()

    def build_search_similar_items_query(self, launch_id, test_item, message):
        """Build search query"""
        return {
            "_source": [
                "whole_message", "test_item", "detected_message", "stacktrace",
                "launch_id", "cluster_id"
            ],
            "size":
            10000,
            "query": {
                "bool": {
                    "filter": [
                        {
                            "range": {
                                "log_level": {
                                    "gte": utils.ERROR_LOGGING_LEVEL
                                }
                            }
                        },
                        {
                            "exists": {
                                "field": "issue_type"
                            }
                        },
                        {
                            "term": {
                                "is_merged": False
                            }
                        },
                    ],
                    "must_not": {
                        "term": {
                            "test_item": {
                                "value": test_item,
                                "boost": 1.0
                            }
                        }
                    },
                    "must": [{
                        "term": {
                            "launch_id": launch_id
                        }
                    }, {
                        "more_like_this": {
                            "fields": ["whole_message"],
                            "like": message,
                            "min_doc_freq": 1,
                            "min_term_freq": 1,
                            "minimum_should_match": "5<98%",
                            "max_query_terms":
                            self.search_cfg["MaxQueryTerms"],
                            "boost": 1.0
                        }
                    }]
                }
            }
        }

    def find_similar_items_from_es(self, groups, log_dict, log_messages,
                                   log_ids, number_of_lines):
        new_clusters = {}
        _clusterizer = clusterizer.Clusterizer()
        for global_group in groups:
            first_item_ind = groups[global_group][0]
            query = self.build_search_similar_items_query(
                log_dict[first_item_ind]["_source"]["launch_id"],
                log_dict[first_item_ind]["_source"]["test_item"],
                log_messages[first_item_ind])
            search_results = self.es_client.es_client.search(
                index=log_dict[first_item_ind]["_index"], body=query)
            log_messages_part = [log_messages[first_item_ind]]
            log_dict_part = {0: log_dict[first_item_ind]}
            ind = 1
            for res in search_results["hits"]["hits"]:
                if int(res["_id"]) in log_ids:
                    continue
                log_dict_part[ind] = res
                log_message = utils.prepare_message_for_clustering(
                    res["_source"]["whole_message"], number_of_lines)
                if not log_message.strip():
                    continue
                log_messages_part.append(log_message)
                ind += 1
            groups_part = _clusterizer.find_clusters(log_messages_part)
            new_group = []
            for group in groups_part:
                if 0 in groups_part[group] and len(groups_part[group]) > 1:
                    cluster_id = ""
                    for ind in groups_part[group]:
                        if log_dict_part[ind]["_source"]["cluster_id"].strip():
                            cluster_id = log_dict_part[ind]["_source"][
                                "cluster_id"].strip()
                    if not cluster_id.strip():
                        cluster_id = str(uuid.uuid1())
                    for ind in groups_part[group]:
                        if ind == 0:
                            continue
                        log_ids.add(int(log_dict_part[ind]["_id"]))
                        new_group.append(
                            ClusterResult(logId=log_dict_part[ind]["_id"],
                                          testItemId=log_dict_part[ind]
                                          ["_source"]["test_item"],
                                          project=log_dict_part[ind]["_index"],
                                          launchId=log_dict_part[ind]
                                          ["_source"]["launch_id"],
                                          clusterId=cluster_id))
                    break
            new_clusters[global_group] = new_group
        return new_clusters

    def gather_cluster_results(self, groups, additional_results, log_dict):
        results_to_return = []
        cluster_num = 0
        for group in groups:
            cnt_items = len(groups[group])
            cluster_id = ""
            if group in additional_results:
                cnt_items += len(additional_results[group])
                for item in additional_results[group]:
                    cluster_id = item.clusterId
                    break
            if cnt_items > 1:
                cluster_num += 1
                if not cluster_id:
                    cluster_id = str(uuid.uuid1())
            for ind in groups[group]:
                results_to_return.append(
                    ClusterResult(
                        logId=log_dict[ind]["_id"],
                        testItemId=log_dict[ind]["_source"]["test_item"],
                        project=log_dict[ind]["_index"],
                        launchId=log_dict[ind]["_source"]["launch_id"],
                        clusterId=cluster_id))
            if cnt_items > 1 and group in additional_results:
                results_to_return.extend(additional_results[group])
        return results_to_return, cluster_num

    @utils.ignore_warnings
    def find_clusters(self, launch_info):
        logger.info("Started clusterizing logs")
        if not self.es_client.index_exists(str(launch_info.launch.project)):
            logger.info("Project %d doesn't exist", launch_info.launch.project)
            logger.info("Finished clustering log with 0 clusters.")
            return []
        t_start = time()
        _clusterizer = clusterizer.Clusterizer()
        log_messages, log_dict = self.log_preparation.prepare_logs_for_clustering(
            launch_info.launch, launch_info.numberOfLogLines)
        log_ids = set([int(log["_id"]) for log in log_dict.values()])
        groups = _clusterizer.find_clusters(log_messages)
        additional_results = {}
        if launch_info.for_update:
            additional_results = self.find_similar_items_from_es(
                groups, log_dict, log_messages, log_ids,
                launch_info.numberOfLogLines)

        results_to_return, cluster_num = self.gather_cluster_results(
            groups, additional_results, log_dict)
        if results_to_return:
            bodies = []
            for result in results_to_return:
                bodies.append({
                    "_op_type": "update",
                    "_id": result.logId,
                    "_index": result.project,
                    "doc": {
                        "cluster_id": result.clusterId
                    }
                })
            self.es_client._bulk_index(bodies)

        results_to_share = {
            launch_info.launch.launchId: {
                "not_found": int(cluster_num == 0),
                "items_to_process": len(log_ids),
                "processed_time": time() - t_start,
                "found_clusters": cluster_num,
                "launch_id": launch_info.launch.launchId,
                "launch_name": launch_info.launch.launchName,
                "project_id": launch_info.launch.project,
                "method": "find_clusters",
                "gather_date": datetime.now().strftime("%Y-%m-%d"),
                "gather_datetime":
                datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "module_version": [self.app_config["appVersion"]],
                "model_info": []
            }
        }
        if "amqpUrl" in self.app_config and self.app_config["amqpUrl"].strip():
            AmqpClient(self.app_config["amqpUrl"]).send_to_inner_queue(
                self.app_config["exchangeName"], "stats_info",
                json.dumps(results_to_share))

        logger.debug("Stats info %s", results_to_share)
        logger.info("Processed the launch. It took %.2f sec.",
                    time() - t_start)
        logger.info("Finished clustering for the launch with %d clusters.",
                    cluster_num)
        return results_to_return
Example #10
0
def create_es_client():
    """Creates Elasticsearch client"""
    return EsClient(APP_CONFIG, SEARCH_CONFIG)
Example #11
0
def init_amqp(_amqp_client):
    """Initialize rabbitmq queues, exchange and stars threads for queue messages processing"""
    with _amqp_client.connection.channel() as channel:
        try:
            declare_exchange(channel, APP_CONFIG)
        except Exception as err:
            logger.error("Failed to declare amqp objects")
            logger.error(err)
            return
    threads = []
    es_client = EsClient(APP_CONFIG, SEARCH_CONFIG)
    if APP_CONFIG["instanceTaskType"] == "train":
        threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive,
                       (APP_CONFIG["exchangeName"], "train_models", True, False,
                       lambda channel, method, props, body:
                       amqp_handler.handle_inner_amqp_request(channel, method, props, body,
                                                              RetrainingService(
                                                                  APP_CONFIG,
                                                                  SEARCH_CONFIG).train_models))))
    else:
        threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive,
                       (APP_CONFIG["exchangeName"], "index", True, False,
                       lambda channel, method, props, body:
                       amqp_handler.handle_amqp_request(channel, method, props, body,
                                                        es_client.index_logs,
                                                        prepare_response_data=amqp_handler.
                                                        prepare_index_response_data))))
        threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive,
                       (APP_CONFIG["exchangeName"], "analyze", True, False,
                       lambda channel, method, props, body:
                       amqp_handler.handle_amqp_request(channel, method, props, body,
                                                        AutoAnalyzerService(
                                                            APP_CONFIG,
                                                            SEARCH_CONFIG).analyze_logs,
                                                        prepare_response_data=amqp_handler.
                                                        prepare_analyze_response_data))))
        threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive,
                       (APP_CONFIG["exchangeName"], "delete", True, False,
                       lambda channel, method, props, body:
                       amqp_handler.handle_amqp_request(channel, method, props, body,
                                                        DeleteIndexService(
                                                            APP_CONFIG, SEARCH_CONFIG).delete_index,
                                                        prepare_data_func=amqp_handler.
                                                        prepare_delete_index,
                                                        prepare_response_data=amqp_handler.
                                                        output_result))))
        threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive,
                       (APP_CONFIG["exchangeName"], "clean", True, False,
                       lambda channel, method, props, body:
                       amqp_handler.handle_amqp_request(channel, method, props, body,
                                                        es_client.delete_logs,
                                                        prepare_data_func=amqp_handler.
                                                        prepare_clean_index,
                                                        prepare_response_data=amqp_handler.
                                                        output_result))))
        threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive,
                       (APP_CONFIG["exchangeName"], "search", True, False,
                       lambda channel, method, props, body:
                       amqp_handler.handle_amqp_request(channel, method, props, body,
                                                        SearchService(APP_CONFIG, SEARCH_CONFIG).search_logs,
                                                        prepare_data_func=amqp_handler.
                                                        prepare_search_logs,
                                                        prepare_response_data=amqp_handler.
                                                        prepare_analyze_response_data))))
        threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive,
                       (APP_CONFIG["exchangeName"], "suggest", True, False,
                       lambda channel, method, props, body:
                       amqp_handler.handle_amqp_request(channel, method, props, body,
                                                        SuggestService(
                                                            APP_CONFIG, SEARCH_CONFIG).suggest_items,
                                                        prepare_data_func=amqp_handler.
                                                        prepare_test_item_info,
                                                        prepare_response_data=amqp_handler.
                                                        prepare_analyze_response_data))))
        threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive,
                       (APP_CONFIG["exchangeName"], "cluster", True, False,
                       lambda channel, method, props, body:
                       amqp_handler.handle_amqp_request(channel, method, props, body,
                                                        ClusterService(
                                                            APP_CONFIG, SEARCH_CONFIG).find_clusters,
                                                        prepare_data_func=amqp_handler.
                                                        prepare_launch_info,
                                                        prepare_response_data=amqp_handler.
                                                        prepare_analyze_response_data))))
        threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive,
                       (APP_CONFIG["exchangeName"], "stats_info", True, False,
                       lambda channel, method, props, body:
                       amqp_handler.handle_inner_amqp_request(channel, method, props, body,
                                                              es_client.send_stats_info))))
        threads.append(create_thread(AmqpClient(APP_CONFIG["amqpUrl"]).receive,
                       (APP_CONFIG["exchangeName"], "namespace_finder", True, False,
                       lambda channel, method, props, body:
                       amqp_handler.handle_amqp_request(channel, method, props, body,
                                                        NamespaceFinderService(
                                                            APP_CONFIG,
                                                            SEARCH_CONFIG).update_chosen_namespaces))))

    return threads
Example #12
0
    SEARCH_CONFIG["SuggestBoostModelFolder"] = model_settings["SUGGEST_BOOST_MODEL_FOLDER"]
    SEARCH_CONFIG["SimilarityWeightsFolder"] = model_settings["SIMILARITY_WEIGHTS_FOLDER"]
    SEARCH_CONFIG["GlobalDefectTypeModelFolder"] = model_settings["GLOBAL_DEFECT_TYPE_MODEL_FOLDER"]


log_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'logging.conf')
logging.config.fileConfig(log_file_path)
if APP_CONFIG["logLevel"].lower() == "debug":
    logging.disable(logging.NOTSET)
elif APP_CONFIG["logLevel"].lower() == "info":
    logging.disable(logging.DEBUG)
else:
    logging.disable(logging.INFO)
logger = logging.getLogger("analyzerApp")
APP_CONFIG["appVersion"] = read_version()
es_client = EsClient(APP_CONFIG, SEARCH_CONFIG)
read_model_settings()

application = create_application()
CORS(application)
threads = []


@application.route('/', methods=['GET'])
def get_health_status():
    status = ""
    if not es_client.is_healthy(APP_CONFIG["esHost"]):
        status += "Elasticsearch is not healthy;"
    if status:
        logger.error("Analyzer health check status failed: %s", status)
        return Response(json.dumps({"status": status}), status=503, mimetype='application/json')