def predict(ver, predict_ver, alike_metrics): predictor_rep = PredictorRepository(predict_ver, ver) training_m = Metrics_Origin(ver, METRICS_DIR) evaluate_m = Metrics_Origin(predict_ver, METRICS_DIR) ens_analyzer = AUCAnalyzer(predict_ver, 'ENS', TARGET) for i in tqdm(range(ITER)): # NML MODEL predictor = predictor_rep.get_predictor('ENS', PRED_TYPE) if predictor is None: print(' predictor has not found, type: ' + PRED_TYPE) return # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100)) # X_resampled, y_resampled = sm.fit_sample(training_m.product_df, training_m.fault) X_resampled, y_resampled = training_m.product_df.as_matrix( ), training_m.fault.as_matrix() nml_model = predictor.train_model(X_resampled, y_resampled) ev_data, dv_data = evaluate_m.get_not_modified_df() nml_value, _ = predictor.predict_ensemble_test_data( nml_model, ev_data, dv_data, None) # RFN MODEL sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.mrg_df, training_m.fault) rfn_model = predictor.train_model(X_resampled, y_resampled) ev_data, dv_data = evaluate_m.get_modified_df() mrg_value, _ = predictor.predict_ensemble_test_data( rfn_model, ev_data, dv_data, None) predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) report_df[REPORT_COLUMNS].to_csv('df.csv') if report_df is not None: ens_analyzer.set_report_df(report_df[REPORT_COLUMNS]) ens_analyzer.calculate() ens_analyzer.analyze_predict_result() # export report ens_df = ens_analyzer.calculate_average(ITER) ens_analyzer.export(target_sw=TARGET, df=ens_df, predictor_type=PRED_TYPE) ens_df = ens_analyzer.calculate_num_report_averge(ITER) ens_analyzer.export_count_report(target_sw=TARGET, df=ens_df, predictor_type=PRED_TYPE)
def predict(ver, predict_ver, alike_metrics): training_m = Metrics_Origin(ver, METRICS_DIR) evaluate_m = Metrics_Origin(predict_ver, METRICS_DIR) ens_analyzer = Analyzer(predict_ver, 'ENS') predictor_rep = PredictorRepository(predict_ver, ver) for i in tqdm(range(ITER)): # NML MODEL predictor = predictor_rep.get_predictor('ENS', PRED_TYPE) if predictor is None: print(' predictor has not found, type: ' + PRED_TYPE) return sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.product_df, training_m.fault) model = predictor.train_model(X_resampled, y_resampled) ev_data, dv_data = evaluate_m.get_not_modified_df() nml_value, _ = predictor.predict_ensemble_proba( model, ev_data, dv_data, None) # DST MODEL predictor2 = predictor_rep.get_predictor('ENS', PRED_TYPE2) if predictor2 is None: print(' predictor has not found, type: ' + PRED_TYPE2) return sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.mrg_df, training_m.fault) model = predictor2.train_model(X_resampled, y_resampled) ev_data, dv_data = evaluate_m.get_modified_df() mrg_value, _ = predictor2.predict_ensemble_proba( model, ev_data, dv_data, None) predictor2.set_is_new_df(evaluate_m.isNew) predictor2.set_is_modified_df(evaluate_m.isModified) report_df = predictor2.export_report(predict_ver) if report_df is not None: ens_analyzer.set_report_df(report_df[REPORT_COLUMNS]) ens_analyzer.calculate() # export report ens_df = ens_analyzer.calculate_average(ITER) predictor_type_name = "{0}{1}".format(PRED_TYPE, PRED_TYPE2) ens_analyzer.export(target_sw=TARGET, df=ens_df, predictor_type=PRED_TYPE) ens_analyzer.export_accum_df(target_sw=TARGET)
def predict(self, box_plotting=False, result_exporting=False): self.model2.set_param_dict(self.PRED_TYPE) self.__get_logger() # (sw_name, version)を2つ受け取り、それぞれの全てのメトリクスを取得する。 training_m = Metrics(self.model1.final_version, self.METRICS_DIR, self.model1) evaluate_m = Metrics(self.model2.final_version, self.METRICS_DIR, self.model2) # alike_metricsを取得する。 self.alike_metrics = st.compare_two_versions(training_m, evaluate_m) # alike_metricsでフィルターしたメトリクスを作成する。 # フィルターをかけないモデルとかけるモデルで予測精度を比較する。 ver, predict_ver = self.model1.final_version, self.model2.final_version # pre_model = mc.retrieve_model(self.model.sw_name, self.model.final_version) predictor_rep = PredictorRepository(predict_ver, self.model1) # training_m = Metrics(ver, self.METRICS_DIR, pre_model) # evaluate_m = Metrics(predict_ver, self.METRICS_DIR, self.model) # self.alike_metrics = st.compare_two_versions(training_m, evaluate_m) msg = "Sw: {}:{}-{}:{}, alike_metrics: {}"\ .format(self.model1.sw_name, self.model1.final_version, self.model2.sw_name, self.model2.final_version, self.alike_metrics) self.report_logger.info(msg) print(msg) if predict_ver is None or self.TARGET is None: self.error_logger.error('could not create AUCAnalyzer instance.\ predict_ver: {}, target: {}'.format(predict_ver, self.TARGET)) return # nml_analyzer = AUCAnalyzer(predict_ver, 'NML', self.TARGET) rfn_analyzer = AUCAnalyzer(predict_ver, 'RFN', self.TARGET) dst_analyzer = AUCAnalyzer(predict_ver, 'DST', self.TARGET) for i in range(self.ITER): # RFN MODEL predictor = predictor_rep.get_predictor('RFN', self.PRED_TYPE) assert predictor is not None,\ print(' predictor has not found, type: ' + self.PRED_TYPE) # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100)) # X_resampled, y_resampled = sm.fit_sample( training_m.mrg_df, training_m.fault ) X_resampled, y_resampled = training_m.mrg_df.as_matrix(),\ training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) rfn_value, importance = predictor.predict_test_data( model, evaluate_m.mrg_df, evaluate_m.fault, self.TARGET + "-ex1rfn.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: rfn_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) rfn_analyzer.calculate() rfn_analyzer.analyze_predict_result() # DSTRIBUTION MODEL predictor = predictor_rep.get_predictor('DST', self.PRED_TYPE) assert predictor is not None,\ print(' predictor has not found, type: ' + self.PRED_TYPE) alike_df = training_m.get_specific_df(self.alike_metrics) # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100)) # X_resampled, y_resampled = sm.fit_sample(alike_df, training_m.fault) X_resampled, y_resampled = alike_df.as_matrix(),\ training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) alike_df = evaluate_m.get_specific_df(self.alike_metrics) itg_value, importance = predictor.predict_test_data( model, alike_df, evaluate_m.fault, self.TARGET + "-ex1itg.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: dst_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) dst_analyzer.calculate() dst_analyzer.analyze_predict_result() # print('feature inportance: {}'.format(importance)) # conducy mann whitneyu test self.conduct_mh_test(rfn_analyzer, dst_analyzer, index=0) self.conduct_mh_test(rfn_analyzer, dst_analyzer, index=2) self.conduct_mh_test(rfn_analyzer, dst_analyzer, index=3) # draw voxplot graph if box_plotting: # self.draw_result_boxplot(rfn_analyzer, itg_analyzer) pass # export report # nml_df = nml_analyzer.calculate_average(self.ITER) rfn_df = rfn_analyzer.calculate_average(self.ITER) itg_df = dst_analyzer.calculate_average(self.ITER) df = pd.concat([rfn_df, itg_df], ignore_index=True) rfn_analyzer.export(target_sw=self.TARGET, df=df, predictor_type=self.PRED_TYPE) # どのanalyzerクラスでも良い
def predict(ver, predict_ver, alike_metrics): predictor_rep = PredictorRepository(predict_ver, ver) # if TARGET == 'Derby': # # Apache-Derby # training_m = Metrics_Origin(ver, METRICS_DIR) # evaluate_m = Metrics_Origin(predict_ver, METRICS_DIR) # else: # # NO SERVUCE # return training_m = Metrics_Origin(ver, METRICS_DIR) evaluate_m = Metrics_Origin(predict_ver, METRICS_DIR) # nml_analyzer = AUCAnalyzer(predict_ver, 'NML', TARGET) # rfn_analyzer = AUCAnalyzer(predict_ver, 'RFN', TARGET) # itg_analyzer = AUCAnalyzer(predict_ver, 'ITG', TARGET) nml_analyzer = Analyzer(predict_ver, 'NML') rfn_analyzer = Analyzer(predict_ver, 'RFN') itg_analyzer = Analyzer(predict_ver, 'ITG') for i in tqdm(range(ITER)): # NML MODEL predictor = predictor_rep.get_predictor('NML', PRED_TYPE) if predictor is None: print(' predictor has not found, type: ' + PRED_TYPE) return sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.product_df, training_m.fault) model = predictor.train_model(X_resampled, y_resampled) nml_value, importance = predictor.predict_proba( model, evaluate_m.product_df, evaluate_m.fault, TARGET + "-ex1nml.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: nml_analyzer.set_report_df(report_df[REPORT_COLUMNS]) nml_analyzer.calculate() # RFN MODEL predictor = predictor_rep.get_predictor('RFN', PRED_TYPE) sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.mrg_df, training_m.fault) model = predictor.train_model(X_resampled, y_resampled) rfn_value, importance = predictor.predict_proba( model, evaluate_m.mrg_df, evaluate_m.fault, TARGET + "-ex1rfn.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: rfn_analyzer.set_report_df(report_df[REPORT_COLUMNS]) rfn_analyzer.calculate() # INTELLIGENCE MODEL predictor = predictor_rep.get_predictor('ITG', PRED_TYPE) sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) alike_df = training_m.get_specific_df(alike_metrics) X_resampled, y_resampled = sm.fit_sample(alike_df, training_m.fault) model = predictor.train_model(X_resampled, y_resampled) alike_df = evaluate_m.get_specific_df(alike_metrics) rfn_value, importance = predictor.predict_proba( model, alike_df, evaluate_m.fault, TARGET + "-ex1itg.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: itg_analyzer.set_report_df(report_df[REPORT_COLUMNS]) itg_analyzer.calculate() # export report nml_df = nml_analyzer.calculate_average(ITER) rfn_df = rfn_analyzer.calculate_average(ITER) itg_df = itg_analyzer.calculate_average(ITER) df = pd.concat([nml_df, rfn_df, itg_df], ignore_index=True) nml_analyzer.export_accum_df(target_sw=TARGET) rfn_analyzer.export_accum_df(target_sw=TARGET) itg_analyzer.export_accum_df(target_sw=TARGET) nml_analyzer.export(target_sw=TARGET, df=df, predictor_type=PRED_TYPE) # どのanalyzerクラスでも良い
def predict_prob(self, box_plotting=True, result_exporting=True): self.model.set_param_dict(self.PRED_TYPE) self.__get_logger() ver, predict_ver = self.model.previous_version, self.model.final_version pre_model = mc.retrieve_model(self.model.sw_name, self.model.final_version) predictor_rep = PredictorRepository(pre_model, self.model) training_m = Metrics(ver, self.METRICS_DIR, pre_model) evaluate_m = Metrics(predict_ver, self.METRICS_DIR, self.model) self.alike_metrics = st.compare_two_versions(training_m, evaluate_m) msg = "Sw: {}, version: {}, alike_metrics: {}"\ .format(self.model.sw_name, predict_ver, self.alike_metrics) self.report_logger.info(msg) if predict_ver is None or self.TARGET is None: self.error_logger.error('could not create AUCAnalyzer instance.\ predict_ver: {}, target: {}'.format(predict_ver, self.TARGET)) return nml_analyzer = Analyzer(predict_ver, 'NML') rfn_analyzer = Analyzer(predict_ver, 'RFN') itg_analyzer = Analyzer(predict_ver, 'ITG') # for i in tqdm(range(self.ITER)): for i in range(self.ITER): # NML MODEL predictor = predictor_rep.get_predictor('NML', self.PRED_TYPE) if predictor is None: print(' predictor has not found, type: ' + self.PRED_TYPE) return sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.product_df, training_m.fault) # X_resampled, y_resampled = training_m.product_df.as_matrix(),\ # training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) nml_value, importance = predictor.predict_proba( model, evaluate_m.product_df, evaluate_m.fault, self.TARGET + "-ex1nml.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: nml_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) nml_analyzer.calculate() # nml_analyzer.analyze_predict_result() # RFN MODEL predictor = predictor_rep.get_predictor('RFN', self.PRED_TYPE) assert predictor is not None,\ print(' predictor has not found, type: ' + self.PRED_TYPE) sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.mrg_df, training_m.fault) # X_resampled, y_resampled = training_m.mrg_df.as_matrix(),\ # training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) rfn_value, importance = predictor.predict_proba( model, evaluate_m.mrg_df, evaluate_m.fault, self.TARGET + "-ex1rfn.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: rfn_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) rfn_analyzer.calculate() # rfn_analyzer.analyze_predict_result() # INTELLIGENCE MODEL predictor = predictor_rep.get_predictor('ITG', self.PRED_TYPE) assert predictor is not None,\ print(' predictor has not found, type: ' + self.PRED_TYPE) alike_df = training_m.get_specific_df(self.alike_metrics) sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(alike_df, training_m.fault) # X_resampled, y_resampled = alike_df.as_matrix(),\ # training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) alike_df = evaluate_m.get_specific_df(self.alike_metrics) rfn_value, importance = predictor.predict_proba( model, alike_df, evaluate_m.fault, self.TARGET + "-ex1itg.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: itg_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) itg_analyzer.calculate()
def predict(self, box_plotting=True, result_exporting=True): self.model.set_param_dict(self.PRED_TYPE) self.__get_logger() ver, predict_ver = self.model.previous_version, self.model.final_version pre_model = mc.retrieve_model(self.model.sw_name, self.model.final_version) predictor_rep = PredictorRepository(pre_model, self.model) training_m = Metrics(ver, self.METRICS_DIR, pre_model) evaluate_m = Metrics(predict_ver, self.METRICS_DIR, self.model) self.alike_metrics = st.compare_two_versions(training_m, evaluate_m) msg = "Sw: {}, version: {}, alike_metrics: {}"\ .format(self.model.sw_name, predict_ver, self.alike_metrics) self.report_logger.info(msg) if predict_ver is None or self.TARGET is None: self.error_logger.error('could not create AUCAnalyzer instance.\ predict_ver: {}, target: {}'.format(predict_ver, self.TARGET)) return nml_analyzer = AUCAnalyzer(predict_ver, 'NML', self.TARGET) rfn_analyzer = AUCAnalyzer(predict_ver, 'RFN', self.TARGET) itg_analyzer = AUCAnalyzer(predict_ver, 'ITG', self.TARGET) # nml_analyzer = Analyzer(predict_ver, 'NML') # rfn_analyzer = Analyzer(predict_ver, 'RFN') # itg_analyzer = Analyzer(predict_ver, 'ITG') # acum_nml_report= pd.DataFrme([]) # acum_rfn_report= pd.DataFrme([]) # acum_intel_report= pd.DataFrme([]) # for i in tqdm(range(self.ITER)): for i in range(self.ITER): # NML MODEL predictor = predictor_rep.get_predictor('NML', self.PRED_TYPE) if predictor is None: print(' predictor has not found, type: ' + self.PRED_TYPE) return # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100)) # X_resampled, y_resampled = sm.fit_sample( training_m.product_df, training_m.fault ) X_resampled, y_resampled = training_m.product_df.as_matrix(),\ training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) nml_value, importance = predictor.predict_test_data( model, evaluate_m.product_df, evaluate_m.fault, self.TARGET + "-ex1nml.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: nml_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) nml_analyzer.calculate() nml_analyzer.analyze_predict_result() # RFN MODEL predictor = predictor_rep.get_predictor('RFN', self.PRED_TYPE) assert predictor is not None,\ print(' predictor has not found, type: ' + self.PRED_TYPE) # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100)) # X_resampled, y_resampled = sm.fit_sample( training_m.mrg_df, training_m.fault ) X_resampled, y_resampled = training_m.mrg_df.as_matrix(),\ training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) rfn_value, importance = predictor.predict_test_data( model, evaluate_m.mrg_df, evaluate_m.fault, self.TARGET + "-ex1rfn.csv", threshold=0.6) predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: rfn_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) rfn_analyzer.calculate() rfn_analyzer.analyze_predict_result() # INTELLIGENCE MODEL predictor = predictor_rep.get_predictor('ITG', self.PRED_TYPE) assert predictor is not None,\ print(' predictor has not found, type: ' + self.PRED_TYPE) alike_df = training_m.get_specific_df(self.alike_metrics) # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100)) # X_resampled, y_resampled = sm.fit_sample(alike_df, training_m.fault) X_resampled, y_resampled = alike_df.as_matrix(),\ training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) alike_df = evaluate_m.get_specific_df(self.alike_metrics) itg_value, importance = predictor.predict_test_data( model, alike_df, evaluate_m.fault, self.TARGET + "-ex1itg.csv", threshold=0.6) predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: itg_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) itg_analyzer.calculate() itg_analyzer.analyze_predict_result() print('feature inportance: {}'.format(importance)) # conducy mann whitneyu test self.conduct_mh_test(rfn_analyzer, itg_analyzer, index=0) self.conduct_mh_test(rfn_analyzer, itg_analyzer, index=2) self.conduct_mh_test(rfn_analyzer, itg_analyzer, index=3) # draw voxplot graph if box_plotting: self.draw_result_boxplot(rfn_analyzer, itg_analyzer) # export report nml_df = nml_analyzer.calculate_average(self.ITER) rfn_df = rfn_analyzer.calculate_average(self.ITER) itg_df = itg_analyzer.calculate_average(self.ITER) df = pd.concat([nml_df, rfn_df, itg_df], ignore_index=True) nml_analyzer.export(target_sw=self.TARGET, df=df, predictor_type=self.PRED_TYPE) # どのanalyzerクラスでも良い nml_df = nml_analyzer.calculate_num_report_averge(self.ITER) rfn_df = rfn_analyzer.calculate_num_report_averge(self.ITER) itg_df = itg_analyzer.calculate_num_report_averge(self.ITER) if result_exporting: nml_analyzer.export_count_report(target_sw=self.TARGET, df=nml_df, predictor_type=self.PRED_TYPE) rfn_analyzer.export_count_report(target_sw=self.TARGET, df=rfn_df, predictor_type=self.PRED_TYPE) itg_analyzer.export_count_report(target_sw=self.TARGET, df=itg_df, predictor_type=self.PRED_TYPE)