Exemple #1
0
def generate_history_regression_data(dbconn, project_id, filepath):
    generate_flag = Config.load_env()["generate_history_regression"]
    if not os.path.exists(filepath):
        generate_flag = True

    if generate_flag:
        print("----generate history regression data----")
        # query history data of 6 month for reference
        six_month_regression_sql = "select * from test_rounds where project_id=%d and DATE_SUB(CURDATE(), INTERVAL 6 MONTH) <= date(start_time) and end_time is not NULL;" % int(
            project_id)
        history_regression = dbconn.get_all_results_from_database(
            six_month_regression_sql)
        with open(filepath, "w") as f:
            f.write(",".join(history_regression[0].keys()) + "\n")
            for row in history_regression:
                new_row = [
                    str(x).replace("\r", " ").replace("\n",
                                                      " ").replace(",", " ")
                    for x in row.values()
                ]
                f.write(",".join(new_row) + "\n")
        print("----there are %d rows in database when query the sql----\n" %
              len(history_regression))
    else:
        print("----NOT generate history regression data----\n")
 def generate_regression_history_data(db_conn, project_id, file_path):
     generate_flag = Config.load_env("generate_regression_history")
     if not os.path.exists(file_path):
         generate_flag = True
     if generate_flag:
         print("generate history regression data")
         # select history data of 12 month for reference
         period_regression_sql = "select * from test_rounds where project_id=%d and DATE_SUB(CURDATE(), INTERVAL 12 MONTH) <= date(start_time) and end_time is not NULL;" % int(project_id)
         period_regression_history = db_conn.get_all_results_from_database(period_regression_sql)
         FileHelper.save_db_query_result_to_csv(period_regression_history, file_path)
         print("there are %d rows in database when query the history\n" % len(period_regression_history))
     else:
         print("NOT generate history regression data\n")
Exemple #3
0
    # generate regression history
    generate_regression_history_data(regression_db,
                                     current_test_round["project_id"],
                                     regression_history_file)

    # decide normal test round or not
    regression_history = pd.read_csv(regression_history_file)
    to_drop = ["counter", "sprint", "exist_regression_report"]
    regression_history.drop(columns=to_drop, inplace=True)
    normal_round = None
    if current_test_round["test_suite_id"] not in regression_history[
            "test_suite_id"]:
        print("Test round with new test suite, no history record")
        # check pass rate line for new test suite
        if current_test_round["pass_rate"] < Config.load_env("pass_rate_line"):
            print(
                "NOT normal Test Round !!! need to check error messages first")
            normal_round = False
        else:
            print("Normal Test Round..")
            normal_round = True
    else:
        pass_rate_quantile_ten_percent = regression_history.loc[
            regression_history["test_suite_id"] ==
            current_test_round["test_suite_id"]].pass_rate.quantile(.1)
        average_pass_rate = regression_history.loc[
            regression_history["test_suite_id"] ==
            current_test_round["test_suite_id"]].pass_rate.mean()
        print("10% quantile is:", "%.2f%%" % pass_rate_quantile_ten_percent)
        print("current pass rate is:",
    def run(self):
        start_time = datetime.now()
        response = {"id": self.test_round_id, "message": ""}
        data_folder = os.path.join(os.getcwd(), "data")
        if not os.path.exists(data_folder):
            os.mkdir(data_folder)
        regression_db = MysqlConnection().connect("local_regression")

        # test round
        current_test_round = regression_db.get_first_result_from_database("select * from test_rounds where id=%d;" % int(self.test_round_id))
        print("specified test round information:\n", current_test_round)
        project_name = regression_db.get_first_result_from_database("select name from projects where id=%d" % int(current_test_round["project_id"]))["name"]
        triage_history_file = os.path.join(os.getcwd(), "data", "triage_history.csv")

        # # generate regression history
        # regression_history_file = os.path.join(os.getcwd(), "data", "regression_history_%s.csv" % project_name)
        # self.generate_regression_history_data(regression_db, current_test_round["project_id"], regression_history_file)
        #
        # # decide normal test round or not
        # regression_history = pd.read_csv(regression_history_file)
        # to_drop = ["counter", "sprint", "exist_regression_report"]
        # regression_history.drop(columns=to_drop, inplace=True)
        # if current_test_round["test_suite_id"] not in regression_history["test_suite_id"]:
        #     print("Test round with new test suite, no history record")
        #     # check pass rate line for new test suite
        #     if current_test_round["pass_rate"] < Config.load_env("pass_rate_line"):
        #         print("NOT normal Test Round !!! need to check error messages first")
        #         normal_round = False  # normal_round to be used in future
        #     else:
        #         print("Normal Test Round..")
        #         normal_round = True
        # else:
        #     test_suite_pass_rate = regression_history.loc[regression_history["test_suite_id"] == current_test_round["test_suite_id"]].pass_rate
        #     if test_suite_pass_rate.dtypes == "object":
        #         test_suite_pass_rate = test_suite_pass_rate.astype("float")
        #     pass_rate_quantile_ten_percent = test_suite_pass_rate.quantile(.1)
        #     average_pass_rate = test_suite_pass_rate.mean()
        #     print("10% quantile is:", "%.2f%%" % pass_rate_quantile_ten_percent)
        #     print("current pass rate is:", "%.2f%%" % current_test_round["pass_rate"])
        #     if (current_test_round["pass_rate"] <= pass_rate_quantile_ten_percent) or ((average_pass_rate - current_test_round["pass_rate"]) > Config.load_env("pass_rate_offset") * 100):
        #         print("NOT normal Test Round !!! need to check error messages first")
        #         normal_round = False
        #     else:
        #         print("Normal Test Round..")
        #         normal_round = True

        # generate error data
        round_errors = self.generate_test_round_errors_data(regression_db)
        round_all_results = self.generate_test_round_results_data(regression_db)
        script_not_case = True if self.automation_script_result_id and not self.automation_case_result_id else False
        if len(round_errors) > 0:
            # if normal_round:
            #     most_failure_element = ErrorAnalyzer.check_element_caused_most_failures(round_errors)
            #     response["message"] = "The element '%s' has most failures: %d times" % (most_failure_element[0], most_failure_element[1])
            # else:
            #     network_error_percentage = ErrorAnalyzer.check_network_issue_percentage(round_errors)
            #     if network_error_percentage > 0.5:
            #         response["message"] = "More than 50%% of failures are caused by network issue, please check environment then rerun test round %d" % test_round_id
            #     else:
            #         most_failure_element = ErrorAnalyzer.check_element_caused_most_failures(round_errors)
            #         response["message"] = "The element '%s' has most failures: %d times" % (most_failure_element[0], most_failure_element[1])

            if not os.path.exists(triage_history_file):
                print("not exist triage history file")
                os.system("python generate_triage_history.py")
            else:
                print("exist triage history file")
            init_triage_history = pd.read_csv(triage_history_file, index_col=0)
            init_triage_history = init_triage_history[init_triage_history["project"] == project_name]
            has_triage = True if len(init_triage_history) > Config.load_env("triage_trigger_ml") else False
            bug_amount = len(init_triage_history[init_triage_history["triage_type"] == "Product Error"])
            include_projects = [p.strip() for p in Config.load_env("apply_to_project").split(",")] if Config.load_env("apply_to_project") else []
            apply_ml = True if len(include_projects) == 0 or project_name in include_projects else False

            # different logic with has_triage flag
            if has_triage and bug_amount > (len(init_triage_history) * 0.05) and apply_ml:
                print("go to ml prejudge")
                if Config.load_env("algorithm") == "knn":
                    init_test_round_results = self.generate_test_round_results_data_ml(regression_db)
                    response["scripts"] = MLPrejudgeHelper.prejudge_all(init_triage_history, init_test_round_results, script_not_case_flag=script_not_case, algorithm="knn")
                    response["type"] = "knn"
                elif Config.load_env("algorithm") == "logistic":
                    project_parameter_file = os.path.join(os.getcwd(), "data", "parameter_%s.csv" % project_name)
                    project_parameter = pd.read_csv(project_parameter_file)
                    init_test_round_results = self.generate_test_round_results_data_ml(regression_db)
                    project_triaged_bug_file = os.path.join(os.getcwd(), "data", "triaged_bug_%s.csv" % project_name)
                    response["scripts"] = MLPrejudgeHelper.prejudge_all(project_parameter, init_test_round_results, script_not_case_flag=script_not_case, algorithm="logistic", logistic_bug_file=project_triaged_bug_file)
                    response["type"] = "logistic"
                else:
                    raise Exception("unknown algorithm")

                # print("go to ml prejudge")
                # init_triage_history["script_duration"].replace("None", 0, inplace=True)
                # init_triage_history["script_duration"] = pd.to_numeric(init_triage_history["script_duration"])
                # init_test_round_results = self.generate_test_round_results_data_ml(regression_db)
                # # response["scripts"] = MLPrejudgeHelper.neighbor_classifier(init_triage_history, init_test_round_results)
                # response["scripts"] = MLPrejudgeHelper.prejudge_all(init_triage_history, init_test_round_results)
                # response["type"] = "ml"
            else:
                print("go to simple prejudge")
                response["scripts"] = SimplePrejudgeHelper.prejudge_all(round_all_results, script_not_case_flag=script_not_case)
                response["type"] = "simple"
        else:
            print("go to simple prejudge")
            response["scripts"] = SimplePrejudgeHelper.prejudge_all(round_all_results, script_not_case_flag=script_not_case)
            response["type"] = "simple"

        response["message"] = self.summary_prejudged_errors(response["scripts"])
        response["time"] = str(datetime.now())
        end_time = datetime.now()
        print(f"duration: {end_time - start_time}")
        return response
Exemple #5
0
        print("current pass rate is:",
              "%.2f%%" % current_test_round["pass_rate"])
        # where returns the whole dataframe, replacing rows that don't match the condition as NaN by default
        # print(history.where(history["test_suite_id"] == current_test_round["test_suite_id"]).count())
        # print(history.where(history["test_suite_id"] == current_test_round["test_suite_id"]).pass_rate.mean())

        pass_rate_quantile_ten_percent = history.loc[
            history["test_suite_id"] ==
            current_test_round["test_suite_id"]].pass_rate.quantile(.1)
        average_pass_rate = history.loc[
            history["test_suite_id"] ==
            current_test_round["test_suite_id"]].pass_rate.mean()
        if current_test_round[
                "pass_rate"] <= pass_rate_quantile_ten_percent or (
                    average_pass_rate - current_test_round["pass_rate"]
                ) > Config.load_env()["pass_rate_offset"] * 100:
            print("Unnormal Test Round !!! need to check error messages first")
            normal_round = False
        else:
            print("Normal Test Round..")
            normal_round = True

    normal_round = False  # debug, will be removed
    if normal_round:
        #todo
        print("todo...")
    else:
        test_round_errors_file = os.path.join(os.getcwd(), "data",
                                              "test_round_errors.csv")
        # generate_error_result = generate_test_round_errors_data(regression_db, test_round_id, test_round_errors_file)
        generate_error_result = True  # debug, will be removed