Example #1
0
def exp(v1, v2):
    version1 = Metrics_Origin(v1, METRICS_DIR)
    version2 = Metrics_Origin(v2, METRICS_DIR)
    print(v1 + '-' + v2)
    alike_metrics = st.compare_two_versions(version1, version2)
    print(alike_metrics)
    predict(v1, v2, alike_metrics)
Example #2
0
def exp(model, metrics_dir):
    metrics_dir = '/Users/{}/Dropbox/STUDY/{}/Derby/all'\
    .format(ENV, model.dir_name)
    ex01 = Ex01(model, metrics_dir)

    ex01.METRICS_DIR = metrics_dir

    v1 = model.curr_version
    v2 = model.pre_version
    version1 = Metrics_Origin(v1, metrics_dir)
    version2 = Metrics_Origin(v2, metrics_dir)
    print(v1 + '-' + v2)
    alike_metrics = st.compare_two_versions(version1, version2)
    print(alike_metrics)
    ex01.predict(v1, v2, alike_metrics)
Example #3
0
 def invest_distribution(self, model1, model2):
     # ver, predict_ver = self.model.previous_version, self.model.final_version
     # pre_model = mc.retrieve_model(self.model.sw_name, self.model.final_version)
     # predictor_rep = PredictorRepository(predict_ver, self.model)
     training_m = Metrics(model1.final_version, self.METRICS_DIR, model1)
     evaluate_m = Metrics(model2.final_version, self.METRICS_DIR, model2)
     alike_metrics = st.compare_two_versions(training_m, evaluate_m)
     print(alike_metrics)
     print(type(alike_metrics))
     print(training_m.mrg_df[alike_metrics])
     print(evaluate_m.mrg_df[alike_metrics])
     raise Exception
     msg = "Sw: {}:{}-{}:{}, alike_metrics: {}"\
         .format(model1.sw_name, model1.final_version, model2.sw_name, model2.final_version, alike_metrics)
     print(msg)
Example #4
0
    def predict(self, box_plotting=False, result_exporting=False):
        self.model2.set_param_dict(self.PRED_TYPE)
        self.__get_logger()
        # (sw_name, version)を2つ受け取り、それぞれの全てのメトリクスを取得する。
        training_m = Metrics(self.model1.final_version, self.METRICS_DIR,
                             self.model1)
        evaluate_m = Metrics(self.model2.final_version, self.METRICS_DIR,
                             self.model2)

        # alike_metricsを取得する。
        self.alike_metrics = st.compare_two_versions(training_m, evaluate_m)
        # alike_metricsでフィルターしたメトリクスを作成する。
        # フィルターをかけないモデルとかけるモデルで予測精度を比較する。

        ver, predict_ver = self.model1.final_version, self.model2.final_version
        # pre_model = mc.retrieve_model(self.model.sw_name, self.model.final_version)
        predictor_rep = PredictorRepository(predict_ver, self.model1)
        # training_m = Metrics(ver, self.METRICS_DIR, pre_model)
        # evaluate_m = Metrics(predict_ver, self.METRICS_DIR, self.model)
        # self.alike_metrics = st.compare_two_versions(training_m, evaluate_m)
        msg = "Sw: {}:{}-{}:{}, alike_metrics: {}"\
            .format(self.model1.sw_name, self.model1.final_version, self.model2.sw_name, self.model2.final_version, self.alike_metrics)
        self.report_logger.info(msg)
        print(msg)

        if predict_ver is None or self.TARGET is None:
            self.error_logger.error('could not create AUCAnalyzer instance.\
            predict_ver: {}, target: {}'.format(predict_ver, self.TARGET))
            return
        # nml_analyzer = AUCAnalyzer(predict_ver, 'NML', self.TARGET)
        rfn_analyzer = AUCAnalyzer(predict_ver, 'RFN', self.TARGET)
        dst_analyzer = AUCAnalyzer(predict_ver, 'DST', self.TARGET)

        for i in range(self.ITER):

            # RFN MODEL
            predictor = predictor_rep.get_predictor('RFN', self.PRED_TYPE)
            assert predictor is not None,\
                print(' predictor has not found, type: ' + self.PRED_TYPE)
            # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100))
            # X_resampled, y_resampled = sm.fit_sample( training_m.mrg_df, training_m.fault )
            X_resampled, y_resampled = training_m.mrg_df.as_matrix(),\
                training_m.fault.as_matrix()
            model = predictor.train_model(X_resampled, y_resampled)
            rfn_value, importance = predictor.predict_test_data(
                model, evaluate_m.mrg_df, evaluate_m.fault,
                self.TARGET + "-ex1rfn.csv")
            predictor.set_is_new_df(evaluate_m.isNew)
            predictor.set_is_modified_df(evaluate_m.isModified)
            report_df = predictor.export_report(predict_ver)
            if report_df is not None:
                rfn_analyzer.set_report_df(report_df[self.REPORT_COLUMNS])
                rfn_analyzer.calculate()
                rfn_analyzer.analyze_predict_result()

            # DSTRIBUTION MODEL
            predictor = predictor_rep.get_predictor('DST', self.PRED_TYPE)
            assert predictor is not None,\
                print(' predictor has not found, type: ' + self.PRED_TYPE)
            alike_df = training_m.get_specific_df(self.alike_metrics)
            # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100))
            # X_resampled, y_resampled = sm.fit_sample(alike_df, training_m.fault)
            X_resampled, y_resampled = alike_df.as_matrix(),\
                training_m.fault.as_matrix()
            model = predictor.train_model(X_resampled, y_resampled)
            alike_df = evaluate_m.get_specific_df(self.alike_metrics)
            itg_value, importance = predictor.predict_test_data(
                model, alike_df, evaluate_m.fault, self.TARGET + "-ex1itg.csv")
            predictor.set_is_new_df(evaluate_m.isNew)
            predictor.set_is_modified_df(evaluate_m.isModified)
            report_df = predictor.export_report(predict_ver)
            if report_df is not None:
                dst_analyzer.set_report_df(report_df[self.REPORT_COLUMNS])
                dst_analyzer.calculate()
                dst_analyzer.analyze_predict_result()

        # print('feature inportance: {}'.format(importance))

        # conducy mann whitneyu test
        self.conduct_mh_test(rfn_analyzer, dst_analyzer, index=0)
        self.conduct_mh_test(rfn_analyzer, dst_analyzer, index=2)
        self.conduct_mh_test(rfn_analyzer, dst_analyzer, index=3)

        # draw voxplot graph
        if box_plotting:
            # self.draw_result_boxplot(rfn_analyzer, itg_analyzer)
            pass

        # export report
        # nml_df = nml_analyzer.calculate_average(self.ITER)
        rfn_df = rfn_analyzer.calculate_average(self.ITER)
        itg_df = dst_analyzer.calculate_average(self.ITER)
        df = pd.concat([rfn_df, itg_df], ignore_index=True)
        rfn_analyzer.export(target_sw=self.TARGET,
                            df=df,
                            predictor_type=self.PRED_TYPE)  # どのanalyzerクラスでも良い
Example #5
0
    def predict_prob(self, box_plotting=True, result_exporting=True):
        self.model.set_param_dict(self.PRED_TYPE)
        self.__get_logger()
        ver, predict_ver = self.model.previous_version, self.model.final_version
        pre_model = mc.retrieve_model(self.model.sw_name,
                                      self.model.final_version)
        predictor_rep = PredictorRepository(pre_model, self.model)
        training_m = Metrics(ver, self.METRICS_DIR, pre_model)
        evaluate_m = Metrics(predict_ver, self.METRICS_DIR, self.model)
        self.alike_metrics = st.compare_two_versions(training_m, evaluate_m)
        msg = "Sw: {}, version: {}, alike_metrics: {}"\
            .format(self.model.sw_name, predict_ver, self.alike_metrics)
        self.report_logger.info(msg)

        if predict_ver is None or self.TARGET is None:
            self.error_logger.error('could not create AUCAnalyzer instance.\
            predict_ver: {}, target: {}'.format(predict_ver, self.TARGET))
            return
        nml_analyzer = Analyzer(predict_ver, 'NML')
        rfn_analyzer = Analyzer(predict_ver, 'RFN')
        itg_analyzer = Analyzer(predict_ver, 'ITG')

        # for i in tqdm(range(self.ITER)):
        for i in range(self.ITER):
            # NML MODEL
            predictor = predictor_rep.get_predictor('NML', self.PRED_TYPE)
            if predictor is None:
                print(' predictor has not found, type: ' + self.PRED_TYPE)
                return
            sm = RandomOverSampler(ratio='auto',
                                   random_state=random.randint(1, 100))
            X_resampled, y_resampled = sm.fit_sample(training_m.product_df,
                                                     training_m.fault)
            # X_resampled, y_resampled = training_m.product_df.as_matrix(),\
            #     training_m.fault.as_matrix()
            model = predictor.train_model(X_resampled, y_resampled)
            nml_value, importance = predictor.predict_proba(
                model, evaluate_m.product_df, evaluate_m.fault,
                self.TARGET + "-ex1nml.csv")
            predictor.set_is_new_df(evaluate_m.isNew)
            predictor.set_is_modified_df(evaluate_m.isModified)
            report_df = predictor.export_report(predict_ver)
            if report_df is not None:
                nml_analyzer.set_report_df(report_df[self.REPORT_COLUMNS])
                nml_analyzer.calculate()
                # nml_analyzer.analyze_predict_result()

            # RFN MODEL
            predictor = predictor_rep.get_predictor('RFN', self.PRED_TYPE)
            assert predictor is not None,\
                print(' predictor has not found, type: ' + self.PRED_TYPE)
            sm = RandomOverSampler(ratio='auto',
                                   random_state=random.randint(1, 100))
            X_resampled, y_resampled = sm.fit_sample(training_m.mrg_df,
                                                     training_m.fault)
            # X_resampled, y_resampled = training_m.mrg_df.as_matrix(),\
            #     training_m.fault.as_matrix()
            model = predictor.train_model(X_resampled, y_resampled)
            rfn_value, importance = predictor.predict_proba(
                model, evaluate_m.mrg_df, evaluate_m.fault,
                self.TARGET + "-ex1rfn.csv")
            predictor.set_is_new_df(evaluate_m.isNew)
            predictor.set_is_modified_df(evaluate_m.isModified)
            report_df = predictor.export_report(predict_ver)
            if report_df is not None:
                rfn_analyzer.set_report_df(report_df[self.REPORT_COLUMNS])
                rfn_analyzer.calculate()
                # rfn_analyzer.analyze_predict_result()

            # INTELLIGENCE MODEL
            predictor = predictor_rep.get_predictor('ITG', self.PRED_TYPE)
            assert predictor is not None,\
                print(' predictor has not found, type: ' + self.PRED_TYPE)
            alike_df = training_m.get_specific_df(self.alike_metrics)
            sm = RandomOverSampler(ratio='auto',
                                   random_state=random.randint(1, 100))
            X_resampled, y_resampled = sm.fit_sample(alike_df,
                                                     training_m.fault)
            # X_resampled, y_resampled = alike_df.as_matrix(),\
            #     training_m.fault.as_matrix()
            model = predictor.train_model(X_resampled, y_resampled)
            alike_df = evaluate_m.get_specific_df(self.alike_metrics)
            rfn_value, importance = predictor.predict_proba(
                model, alike_df, evaluate_m.fault, self.TARGET + "-ex1itg.csv")
            predictor.set_is_new_df(evaluate_m.isNew)
            predictor.set_is_modified_df(evaluate_m.isModified)
            report_df = predictor.export_report(predict_ver)
            if report_df is not None:
                itg_analyzer.set_report_df(report_df[self.REPORT_COLUMNS])
                itg_analyzer.calculate()
Example #6
0
    def predict(self, box_plotting=True, result_exporting=True):
        self.model.set_param_dict(self.PRED_TYPE)
        self.__get_logger()
        ver, predict_ver = self.model.previous_version, self.model.final_version
        pre_model = mc.retrieve_model(self.model.sw_name,
                                      self.model.final_version)
        predictor_rep = PredictorRepository(pre_model, self.model)
        training_m = Metrics(ver, self.METRICS_DIR, pre_model)
        evaluate_m = Metrics(predict_ver, self.METRICS_DIR, self.model)
        self.alike_metrics = st.compare_two_versions(training_m, evaluate_m)
        msg = "Sw: {}, version: {}, alike_metrics: {}"\
            .format(self.model.sw_name, predict_ver, self.alike_metrics)
        self.report_logger.info(msg)

        if predict_ver is None or self.TARGET is None:
            self.error_logger.error('could not create AUCAnalyzer instance.\
            predict_ver: {}, target: {}'.format(predict_ver, self.TARGET))
            return
        nml_analyzer = AUCAnalyzer(predict_ver, 'NML', self.TARGET)
        rfn_analyzer = AUCAnalyzer(predict_ver, 'RFN', self.TARGET)
        itg_analyzer = AUCAnalyzer(predict_ver, 'ITG', self.TARGET)
        # nml_analyzer = Analyzer(predict_ver, 'NML')
        # rfn_analyzer = Analyzer(predict_ver, 'RFN')
        # itg_analyzer = Analyzer(predict_ver, 'ITG')

        # acum_nml_report= pd.DataFrme([])
        # acum_rfn_report= pd.DataFrme([])
        # acum_intel_report= pd.DataFrme([])

        # for i in tqdm(range(self.ITER)):
        for i in range(self.ITER):
            # NML MODEL
            predictor = predictor_rep.get_predictor('NML', self.PRED_TYPE)
            if predictor is None:
                print(' predictor has not found, type: ' + self.PRED_TYPE)
                return
            # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100))
            # X_resampled, y_resampled = sm.fit_sample( training_m.product_df, training_m.fault )
            X_resampled, y_resampled = training_m.product_df.as_matrix(),\
                training_m.fault.as_matrix()
            model = predictor.train_model(X_resampled, y_resampled)
            nml_value, importance = predictor.predict_test_data(
                model, evaluate_m.product_df, evaluate_m.fault,
                self.TARGET + "-ex1nml.csv")
            predictor.set_is_new_df(evaluate_m.isNew)
            predictor.set_is_modified_df(evaluate_m.isModified)
            report_df = predictor.export_report(predict_ver)
            if report_df is not None:
                nml_analyzer.set_report_df(report_df[self.REPORT_COLUMNS])
                nml_analyzer.calculate()
                nml_analyzer.analyze_predict_result()

            # RFN MODEL
            predictor = predictor_rep.get_predictor('RFN', self.PRED_TYPE)
            assert predictor is not None,\
                print(' predictor has not found, type: ' + self.PRED_TYPE)
            # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100))
            # X_resampled, y_resampled = sm.fit_sample( training_m.mrg_df, training_m.fault )
            X_resampled, y_resampled = training_m.mrg_df.as_matrix(),\
                training_m.fault.as_matrix()
            model = predictor.train_model(X_resampled, y_resampled)
            rfn_value, importance = predictor.predict_test_data(
                model,
                evaluate_m.mrg_df,
                evaluate_m.fault,
                self.TARGET + "-ex1rfn.csv",
                threshold=0.6)
            predictor.set_is_new_df(evaluate_m.isNew)
            predictor.set_is_modified_df(evaluate_m.isModified)
            report_df = predictor.export_report(predict_ver)
            if report_df is not None:
                rfn_analyzer.set_report_df(report_df[self.REPORT_COLUMNS])
                rfn_analyzer.calculate()
                rfn_analyzer.analyze_predict_result()

            # INTELLIGENCE MODEL
            predictor = predictor_rep.get_predictor('ITG', self.PRED_TYPE)
            assert predictor is not None,\
                print(' predictor has not found, type: ' + self.PRED_TYPE)
            alike_df = training_m.get_specific_df(self.alike_metrics)
            # sm = RandomOverSampler(ratio='auto', random_state=random.randint(1,100))
            # X_resampled, y_resampled = sm.fit_sample(alike_df, training_m.fault)
            X_resampled, y_resampled = alike_df.as_matrix(),\
                training_m.fault.as_matrix()
            model = predictor.train_model(X_resampled, y_resampled)
            alike_df = evaluate_m.get_specific_df(self.alike_metrics)
            itg_value, importance = predictor.predict_test_data(
                model,
                alike_df,
                evaluate_m.fault,
                self.TARGET + "-ex1itg.csv",
                threshold=0.6)
            predictor.set_is_new_df(evaluate_m.isNew)
            predictor.set_is_modified_df(evaluate_m.isModified)
            report_df = predictor.export_report(predict_ver)
            if report_df is not None:
                itg_analyzer.set_report_df(report_df[self.REPORT_COLUMNS])
                itg_analyzer.calculate()
                itg_analyzer.analyze_predict_result()

        print('feature inportance: {}'.format(importance))

        # conducy mann whitneyu test
        self.conduct_mh_test(rfn_analyzer, itg_analyzer, index=0)
        self.conduct_mh_test(rfn_analyzer, itg_analyzer, index=2)
        self.conduct_mh_test(rfn_analyzer, itg_analyzer, index=3)

        # draw voxplot graph
        if box_plotting:
            self.draw_result_boxplot(rfn_analyzer, itg_analyzer)

        # export report
        nml_df = nml_analyzer.calculate_average(self.ITER)
        rfn_df = rfn_analyzer.calculate_average(self.ITER)
        itg_df = itg_analyzer.calculate_average(self.ITER)
        df = pd.concat([nml_df, rfn_df, itg_df], ignore_index=True)
        nml_analyzer.export(target_sw=self.TARGET,
                            df=df,
                            predictor_type=self.PRED_TYPE)  # どのanalyzerクラスでも良い

        nml_df = nml_analyzer.calculate_num_report_averge(self.ITER)
        rfn_df = rfn_analyzer.calculate_num_report_averge(self.ITER)
        itg_df = itg_analyzer.calculate_num_report_averge(self.ITER)

        if result_exporting:
            nml_analyzer.export_count_report(target_sw=self.TARGET,
                                             df=nml_df,
                                             predictor_type=self.PRED_TYPE)
            rfn_analyzer.export_count_report(target_sw=self.TARGET,
                                             df=rfn_df,
                                             predictor_type=self.PRED_TYPE)
            itg_analyzer.export_count_report(target_sw=self.TARGET,
                                             df=itg_df,
                                             predictor_type=self.PRED_TYPE)