def exp(v1, v2): version1 = Metrics_Origin(v1, METRICS_DIR) version2 = Metrics_Origin(v2, METRICS_DIR) print(v1 + '-' + v2) alike_metrics = st.compare_two_versions(version1, version2) print(alike_metrics) predict(v1, v2, alike_metrics)
def exp(model, metrics_dir): metrics_dir = '/Users/{}/Dropbox/STUDY/{}/Derby/all'\ .format(ENV, model.dir_name) ex01 = Ex01(model, metrics_dir) ex01.METRICS_DIR = metrics_dir v1 = model.curr_version v2 = model.pre_version version1 = Metrics_Origin(v1, metrics_dir) version2 = Metrics_Origin(v2, metrics_dir) print(v1 + '-' + v2) alike_metrics = st.compare_two_versions(version1, version2) print(alike_metrics) ex01.predict(v1, v2, alike_metrics)
def invest_distribution(self, model1, model2): # ver, predict_ver = self.model.previous_version, self.model.final_version # pre_model = mc.retrieve_model(self.model.sw_name, self.model.final_version) # predictor_rep = PredictorRepository(predict_ver, self.model) training_m = Metrics(model1.final_version, self.METRICS_DIR, model1) evaluate_m = Metrics(model2.final_version, self.METRICS_DIR, model2) alike_metrics = st.compare_two_versions(training_m, evaluate_m) print(alike_metrics) print(type(alike_metrics)) print(training_m.mrg_df[alike_metrics]) print(evaluate_m.mrg_df[alike_metrics]) raise Exception msg = "Sw: {}:{}-{}:{}, alike_metrics: {}"\ .format(model1.sw_name, model1.final_version, model2.sw_name, model2.final_version, alike_metrics) print(msg)
def predict(self, box_plotting=False, result_exporting=False): self.model2.set_param_dict(self.PRED_TYPE) self.__get_logger() # (sw_name, version)を2つ受け取り、それぞれの全てのメトリクスを取得する。 training_m = Metrics(self.model1.final_version, self.METRICS_DIR, self.model1) evaluate_m = Metrics(self.model2.final_version, self.METRICS_DIR, self.model2) # alike_metricsを取得する。 self.alike_metrics = st.compare_two_versions(training_m, evaluate_m) # alike_metricsでフィルターしたメトリクスを作成する。 # フィルターをかけないモデルとかけるモデルで予測精度を比較する。 ver, predict_ver = self.model1.final_version, self.model2.final_version # pre_model = mc.retrieve_model(self.model.sw_name, self.model.final_version) predictor_rep = PredictorRepository(predict_ver, self.model1) # training_m = Metrics(ver, self.METRICS_DIR, pre_model) # evaluate_m = Metrics(predict_ver, self.METRICS_DIR, self.model) # self.alike_metrics = st.compare_two_versions(training_m, evaluate_m) msg = "Sw: {}:{}-{}:{}, alike_metrics: {}"\ .format(self.model1.sw_name, self.model1.final_version, self.model2.sw_name, self.model2.final_version, self.alike_metrics) self.report_logger.info(msg) print(msg) if predict_ver is None or self.TARGET is None: self.error_logger.error('could not create AUCAnalyzer instance.\ predict_ver: {}, target: {}'.format(predict_ver, self.TARGET)) return # nml_analyzer = AUCAnalyzer(predict_ver, 'NML', self.TARGET) rfn_analyzer = AUCAnalyzer(predict_ver, 'RFN', self.TARGET) dst_analyzer = AUCAnalyzer(predict_ver, 'DST', self.TARGET) for i in range(self.ITER): # RFN MODEL predictor = predictor_rep.get_predictor('RFN', self.PRED_TYPE) assert predictor is not None,\ print(' predictor has not found, type: ' + self.PRED_TYPE) # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100)) # X_resampled, y_resampled = sm.fit_sample( training_m.mrg_df, training_m.fault ) X_resampled, y_resampled = training_m.mrg_df.as_matrix(),\ training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) rfn_value, importance = predictor.predict_test_data( model, evaluate_m.mrg_df, evaluate_m.fault, self.TARGET + "-ex1rfn.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: rfn_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) rfn_analyzer.calculate() rfn_analyzer.analyze_predict_result() # DSTRIBUTION MODEL predictor = predictor_rep.get_predictor('DST', self.PRED_TYPE) assert predictor is not None,\ print(' predictor has not found, type: ' + self.PRED_TYPE) alike_df = training_m.get_specific_df(self.alike_metrics) # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100)) # X_resampled, y_resampled = sm.fit_sample(alike_df, training_m.fault) X_resampled, y_resampled = alike_df.as_matrix(),\ training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) alike_df = evaluate_m.get_specific_df(self.alike_metrics) itg_value, importance = predictor.predict_test_data( model, alike_df, evaluate_m.fault, self.TARGET + "-ex1itg.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: dst_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) dst_analyzer.calculate() dst_analyzer.analyze_predict_result() # print('feature inportance: {}'.format(importance)) # conducy mann whitneyu test self.conduct_mh_test(rfn_analyzer, dst_analyzer, index=0) self.conduct_mh_test(rfn_analyzer, dst_analyzer, index=2) self.conduct_mh_test(rfn_analyzer, dst_analyzer, index=3) # draw voxplot graph if box_plotting: # self.draw_result_boxplot(rfn_analyzer, itg_analyzer) pass # export report # nml_df = nml_analyzer.calculate_average(self.ITER) rfn_df = rfn_analyzer.calculate_average(self.ITER) itg_df = dst_analyzer.calculate_average(self.ITER) df = pd.concat([rfn_df, itg_df], ignore_index=True) rfn_analyzer.export(target_sw=self.TARGET, df=df, predictor_type=self.PRED_TYPE) # どのanalyzerクラスでも良い
def predict_prob(self, box_plotting=True, result_exporting=True): self.model.set_param_dict(self.PRED_TYPE) self.__get_logger() ver, predict_ver = self.model.previous_version, self.model.final_version pre_model = mc.retrieve_model(self.model.sw_name, self.model.final_version) predictor_rep = PredictorRepository(pre_model, self.model) training_m = Metrics(ver, self.METRICS_DIR, pre_model) evaluate_m = Metrics(predict_ver, self.METRICS_DIR, self.model) self.alike_metrics = st.compare_two_versions(training_m, evaluate_m) msg = "Sw: {}, version: {}, alike_metrics: {}"\ .format(self.model.sw_name, predict_ver, self.alike_metrics) self.report_logger.info(msg) if predict_ver is None or self.TARGET is None: self.error_logger.error('could not create AUCAnalyzer instance.\ predict_ver: {}, target: {}'.format(predict_ver, self.TARGET)) return nml_analyzer = Analyzer(predict_ver, 'NML') rfn_analyzer = Analyzer(predict_ver, 'RFN') itg_analyzer = Analyzer(predict_ver, 'ITG') # for i in tqdm(range(self.ITER)): for i in range(self.ITER): # NML MODEL predictor = predictor_rep.get_predictor('NML', self.PRED_TYPE) if predictor is None: print(' predictor has not found, type: ' + self.PRED_TYPE) return sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.product_df, training_m.fault) # X_resampled, y_resampled = training_m.product_df.as_matrix(),\ # training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) nml_value, importance = predictor.predict_proba( model, evaluate_m.product_df, evaluate_m.fault, self.TARGET + "-ex1nml.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: nml_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) nml_analyzer.calculate() # nml_analyzer.analyze_predict_result() # RFN MODEL predictor = predictor_rep.get_predictor('RFN', self.PRED_TYPE) assert predictor is not None,\ print(' predictor has not found, type: ' + self.PRED_TYPE) sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(training_m.mrg_df, training_m.fault) # X_resampled, y_resampled = training_m.mrg_df.as_matrix(),\ # training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) rfn_value, importance = predictor.predict_proba( model, evaluate_m.mrg_df, evaluate_m.fault, self.TARGET + "-ex1rfn.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: rfn_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) rfn_analyzer.calculate() # rfn_analyzer.analyze_predict_result() # INTELLIGENCE MODEL predictor = predictor_rep.get_predictor('ITG', self.PRED_TYPE) assert predictor is not None,\ print(' predictor has not found, type: ' + self.PRED_TYPE) alike_df = training_m.get_specific_df(self.alike_metrics) sm = RandomOverSampler(ratio='auto', random_state=random.randint(1, 100)) X_resampled, y_resampled = sm.fit_sample(alike_df, training_m.fault) # X_resampled, y_resampled = alike_df.as_matrix(),\ # training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) alike_df = evaluate_m.get_specific_df(self.alike_metrics) rfn_value, importance = predictor.predict_proba( model, alike_df, evaluate_m.fault, self.TARGET + "-ex1itg.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: itg_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) itg_analyzer.calculate()
def predict(self, box_plotting=True, result_exporting=True): self.model.set_param_dict(self.PRED_TYPE) self.__get_logger() ver, predict_ver = self.model.previous_version, self.model.final_version pre_model = mc.retrieve_model(self.model.sw_name, self.model.final_version) predictor_rep = PredictorRepository(pre_model, self.model) training_m = Metrics(ver, self.METRICS_DIR, pre_model) evaluate_m = Metrics(predict_ver, self.METRICS_DIR, self.model) self.alike_metrics = st.compare_two_versions(training_m, evaluate_m) msg = "Sw: {}, version: {}, alike_metrics: {}"\ .format(self.model.sw_name, predict_ver, self.alike_metrics) self.report_logger.info(msg) if predict_ver is None or self.TARGET is None: self.error_logger.error('could not create AUCAnalyzer instance.\ predict_ver: {}, target: {}'.format(predict_ver, self.TARGET)) return nml_analyzer = AUCAnalyzer(predict_ver, 'NML', self.TARGET) rfn_analyzer = AUCAnalyzer(predict_ver, 'RFN', self.TARGET) itg_analyzer = AUCAnalyzer(predict_ver, 'ITG', self.TARGET) # nml_analyzer = Analyzer(predict_ver, 'NML') # rfn_analyzer = Analyzer(predict_ver, 'RFN') # itg_analyzer = Analyzer(predict_ver, 'ITG') # acum_nml_report= pd.DataFrme([]) # acum_rfn_report= pd.DataFrme([]) # acum_intel_report= pd.DataFrme([]) # for i in tqdm(range(self.ITER)): for i in range(self.ITER): # NML MODEL predictor = predictor_rep.get_predictor('NML', self.PRED_TYPE) if predictor is None: print(' predictor has not found, type: ' + self.PRED_TYPE) return # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100)) # X_resampled, y_resampled = sm.fit_sample( training_m.product_df, training_m.fault ) X_resampled, y_resampled = training_m.product_df.as_matrix(),\ training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) nml_value, importance = predictor.predict_test_data( model, evaluate_m.product_df, evaluate_m.fault, self.TARGET + "-ex1nml.csv") predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: nml_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) nml_analyzer.calculate() nml_analyzer.analyze_predict_result() # RFN MODEL predictor = predictor_rep.get_predictor('RFN', self.PRED_TYPE) assert predictor is not None,\ print(' predictor has not found, type: ' + self.PRED_TYPE) # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100)) # X_resampled, y_resampled = sm.fit_sample( training_m.mrg_df, training_m.fault ) X_resampled, y_resampled = training_m.mrg_df.as_matrix(),\ training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) rfn_value, importance = predictor.predict_test_data( model, evaluate_m.mrg_df, evaluate_m.fault, self.TARGET + "-ex1rfn.csv", threshold=0.6) predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: rfn_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) rfn_analyzer.calculate() rfn_analyzer.analyze_predict_result() # INTELLIGENCE MODEL predictor = predictor_rep.get_predictor('ITG', self.PRED_TYPE) assert predictor is not None,\ print(' predictor has not found, type: ' + self.PRED_TYPE) alike_df = training_m.get_specific_df(self.alike_metrics) # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100)) # X_resampled, y_resampled = sm.fit_sample(alike_df, training_m.fault) X_resampled, y_resampled = alike_df.as_matrix(),\ training_m.fault.as_matrix() model = predictor.train_model(X_resampled, y_resampled) alike_df = evaluate_m.get_specific_df(self.alike_metrics) itg_value, importance = predictor.predict_test_data( model, alike_df, evaluate_m.fault, self.TARGET + "-ex1itg.csv", threshold=0.6) predictor.set_is_new_df(evaluate_m.isNew) predictor.set_is_modified_df(evaluate_m.isModified) report_df = predictor.export_report(predict_ver) if report_df is not None: itg_analyzer.set_report_df(report_df[self.REPORT_COLUMNS]) itg_analyzer.calculate() itg_analyzer.analyze_predict_result() print('feature inportance: {}'.format(importance)) # conducy mann whitneyu test self.conduct_mh_test(rfn_analyzer, itg_analyzer, index=0) self.conduct_mh_test(rfn_analyzer, itg_analyzer, index=2) self.conduct_mh_test(rfn_analyzer, itg_analyzer, index=3) # draw voxplot graph if box_plotting: self.draw_result_boxplot(rfn_analyzer, itg_analyzer) # export report nml_df = nml_analyzer.calculate_average(self.ITER) rfn_df = rfn_analyzer.calculate_average(self.ITER) itg_df = itg_analyzer.calculate_average(self.ITER) df = pd.concat([nml_df, rfn_df, itg_df], ignore_index=True) nml_analyzer.export(target_sw=self.TARGET, df=df, predictor_type=self.PRED_TYPE) # どのanalyzerクラスでも良い nml_df = nml_analyzer.calculate_num_report_averge(self.ITER) rfn_df = rfn_analyzer.calculate_num_report_averge(self.ITER) itg_df = itg_analyzer.calculate_num_report_averge(self.ITER) if result_exporting: nml_analyzer.export_count_report(target_sw=self.TARGET, df=nml_df, predictor_type=self.PRED_TYPE) rfn_analyzer.export_count_report(target_sw=self.TARGET, df=rfn_df, predictor_type=self.PRED_TYPE) itg_analyzer.export_count_report(target_sw=self.TARGET, df=itg_df, predictor_type=self.PRED_TYPE)