def generate_history_regression_data(dbconn, project_id, filepath): generate_flag = Config.load_env()["generate_history_regression"] if not os.path.exists(filepath): generate_flag = True if generate_flag: print("----generate history regression data----") # query history data of 6 month for reference six_month_regression_sql = "select * from test_rounds where project_id=%d and DATE_SUB(CURDATE(), INTERVAL 6 MONTH) <= date(start_time) and end_time is not NULL;" % int( project_id) history_regression = dbconn.get_all_results_from_database( six_month_regression_sql) with open(filepath, "w") as f: f.write(",".join(history_regression[0].keys()) + "\n") for row in history_regression: new_row = [ str(x).replace("\r", " ").replace("\n", " ").replace(",", " ") for x in row.values() ] f.write(",".join(new_row) + "\n") print("----there are %d rows in database when query the sql----\n" % len(history_regression)) else: print("----NOT generate history regression data----\n")
def generate_regression_history_data(db_conn, project_id, file_path): generate_flag = Config.load_env("generate_regression_history") if not os.path.exists(file_path): generate_flag = True if generate_flag: print("generate history regression data") # select history data of 12 month for reference period_regression_sql = "select * from test_rounds where project_id=%d and DATE_SUB(CURDATE(), INTERVAL 12 MONTH) <= date(start_time) and end_time is not NULL;" % int(project_id) period_regression_history = db_conn.get_all_results_from_database(period_regression_sql) FileHelper.save_db_query_result_to_csv(period_regression_history, file_path) print("there are %d rows in database when query the history\n" % len(period_regression_history)) else: print("NOT generate history regression data\n")
# generate regression history generate_regression_history_data(regression_db, current_test_round["project_id"], regression_history_file) # decide normal test round or not regression_history = pd.read_csv(regression_history_file) to_drop = ["counter", "sprint", "exist_regression_report"] regression_history.drop(columns=to_drop, inplace=True) normal_round = None if current_test_round["test_suite_id"] not in regression_history[ "test_suite_id"]: print("Test round with new test suite, no history record") # check pass rate line for new test suite if current_test_round["pass_rate"] < Config.load_env("pass_rate_line"): print( "NOT normal Test Round !!! need to check error messages first") normal_round = False else: print("Normal Test Round..") normal_round = True else: pass_rate_quantile_ten_percent = regression_history.loc[ regression_history["test_suite_id"] == current_test_round["test_suite_id"]].pass_rate.quantile(.1) average_pass_rate = regression_history.loc[ regression_history["test_suite_id"] == current_test_round["test_suite_id"]].pass_rate.mean() print("10% quantile is:", "%.2f%%" % pass_rate_quantile_ten_percent) print("current pass rate is:",
def run(self): start_time = datetime.now() response = {"id": self.test_round_id, "message": ""} data_folder = os.path.join(os.getcwd(), "data") if not os.path.exists(data_folder): os.mkdir(data_folder) regression_db = MysqlConnection().connect("local_regression") # test round current_test_round = regression_db.get_first_result_from_database("select * from test_rounds where id=%d;" % int(self.test_round_id)) print("specified test round information:\n", current_test_round) project_name = regression_db.get_first_result_from_database("select name from projects where id=%d" % int(current_test_round["project_id"]))["name"] triage_history_file = os.path.join(os.getcwd(), "data", "triage_history.csv") # # generate regression history # regression_history_file = os.path.join(os.getcwd(), "data", "regression_history_%s.csv" % project_name) # self.generate_regression_history_data(regression_db, current_test_round["project_id"], regression_history_file) # # # decide normal test round or not # regression_history = pd.read_csv(regression_history_file) # to_drop = ["counter", "sprint", "exist_regression_report"] # regression_history.drop(columns=to_drop, inplace=True) # if current_test_round["test_suite_id"] not in regression_history["test_suite_id"]: # print("Test round with new test suite, no history record") # # check pass rate line for new test suite # if current_test_round["pass_rate"] < Config.load_env("pass_rate_line"): # print("NOT normal Test Round !!! need to check error messages first") # normal_round = False # normal_round to be used in future # else: # print("Normal Test Round..") # normal_round = True # else: # test_suite_pass_rate = regression_history.loc[regression_history["test_suite_id"] == current_test_round["test_suite_id"]].pass_rate # if test_suite_pass_rate.dtypes == "object": # test_suite_pass_rate = test_suite_pass_rate.astype("float") # pass_rate_quantile_ten_percent = test_suite_pass_rate.quantile(.1) # average_pass_rate = test_suite_pass_rate.mean() # print("10% quantile is:", "%.2f%%" % pass_rate_quantile_ten_percent) # print("current pass rate is:", "%.2f%%" % current_test_round["pass_rate"]) # if (current_test_round["pass_rate"] <= pass_rate_quantile_ten_percent) or ((average_pass_rate - current_test_round["pass_rate"]) > Config.load_env("pass_rate_offset") * 100): # print("NOT normal Test Round !!! need to check error messages first") # normal_round = False # else: # print("Normal Test Round..") # normal_round = True # generate error data round_errors = self.generate_test_round_errors_data(regression_db) round_all_results = self.generate_test_round_results_data(regression_db) script_not_case = True if self.automation_script_result_id and not self.automation_case_result_id else False if len(round_errors) > 0: # if normal_round: # most_failure_element = ErrorAnalyzer.check_element_caused_most_failures(round_errors) # response["message"] = "The element '%s' has most failures: %d times" % (most_failure_element[0], most_failure_element[1]) # else: # network_error_percentage = ErrorAnalyzer.check_network_issue_percentage(round_errors) # if network_error_percentage > 0.5: # response["message"] = "More than 50%% of failures are caused by network issue, please check environment then rerun test round %d" % test_round_id # else: # most_failure_element = ErrorAnalyzer.check_element_caused_most_failures(round_errors) # response["message"] = "The element '%s' has most failures: %d times" % (most_failure_element[0], most_failure_element[1]) if not os.path.exists(triage_history_file): print("not exist triage history file") os.system("python generate_triage_history.py") else: print("exist triage history file") init_triage_history = pd.read_csv(triage_history_file, index_col=0) init_triage_history = init_triage_history[init_triage_history["project"] == project_name] has_triage = True if len(init_triage_history) > Config.load_env("triage_trigger_ml") else False bug_amount = len(init_triage_history[init_triage_history["triage_type"] == "Product Error"]) include_projects = [p.strip() for p in Config.load_env("apply_to_project").split(",")] if Config.load_env("apply_to_project") else [] apply_ml = True if len(include_projects) == 0 or project_name in include_projects else False # different logic with has_triage flag if has_triage and bug_amount > (len(init_triage_history) * 0.05) and apply_ml: print("go to ml prejudge") if Config.load_env("algorithm") == "knn": init_test_round_results = self.generate_test_round_results_data_ml(regression_db) response["scripts"] = MLPrejudgeHelper.prejudge_all(init_triage_history, init_test_round_results, script_not_case_flag=script_not_case, algorithm="knn") response["type"] = "knn" elif Config.load_env("algorithm") == "logistic": project_parameter_file = os.path.join(os.getcwd(), "data", "parameter_%s.csv" % project_name) project_parameter = pd.read_csv(project_parameter_file) init_test_round_results = self.generate_test_round_results_data_ml(regression_db) project_triaged_bug_file = os.path.join(os.getcwd(), "data", "triaged_bug_%s.csv" % project_name) response["scripts"] = MLPrejudgeHelper.prejudge_all(project_parameter, init_test_round_results, script_not_case_flag=script_not_case, algorithm="logistic", logistic_bug_file=project_triaged_bug_file) response["type"] = "logistic" else: raise Exception("unknown algorithm") # print("go to ml prejudge") # init_triage_history["script_duration"].replace("None", 0, inplace=True) # init_triage_history["script_duration"] = pd.to_numeric(init_triage_history["script_duration"]) # init_test_round_results = self.generate_test_round_results_data_ml(regression_db) # # response["scripts"] = MLPrejudgeHelper.neighbor_classifier(init_triage_history, init_test_round_results) # response["scripts"] = MLPrejudgeHelper.prejudge_all(init_triage_history, init_test_round_results) # response["type"] = "ml" else: print("go to simple prejudge") response["scripts"] = SimplePrejudgeHelper.prejudge_all(round_all_results, script_not_case_flag=script_not_case) response["type"] = "simple" else: print("go to simple prejudge") response["scripts"] = SimplePrejudgeHelper.prejudge_all(round_all_results, script_not_case_flag=script_not_case) response["type"] = "simple" response["message"] = self.summary_prejudged_errors(response["scripts"]) response["time"] = str(datetime.now()) end_time = datetime.now() print(f"duration: {end_time - start_time}") return response
print("current pass rate is:", "%.2f%%" % current_test_round["pass_rate"]) # where returns the whole dataframe, replacing rows that don't match the condition as NaN by default # print(history.where(history["test_suite_id"] == current_test_round["test_suite_id"]).count()) # print(history.where(history["test_suite_id"] == current_test_round["test_suite_id"]).pass_rate.mean()) pass_rate_quantile_ten_percent = history.loc[ history["test_suite_id"] == current_test_round["test_suite_id"]].pass_rate.quantile(.1) average_pass_rate = history.loc[ history["test_suite_id"] == current_test_round["test_suite_id"]].pass_rate.mean() if current_test_round[ "pass_rate"] <= pass_rate_quantile_ten_percent or ( average_pass_rate - current_test_round["pass_rate"] ) > Config.load_env()["pass_rate_offset"] * 100: print("Unnormal Test Round !!! need to check error messages first") normal_round = False else: print("Normal Test Round..") normal_round = True normal_round = False # debug, will be removed if normal_round: #todo print("todo...") else: test_round_errors_file = os.path.join(os.getcwd(), "data", "test_round_errors.csv") # generate_error_result = generate_test_round_errors_data(regression_db, test_round_id, test_round_errors_file) generate_error_result = True # debug, will be removed