def main():
    pre_df = csv_to_df("./dataset/", "data.csv")
    feature_matrix = feature_eng(pre_df)
    label_matrix = label_eng(pre_df)
    generator = cv_generator(feature_matrix, label_matrix)
    #running_model = model_LR()
    #running_model = model_SVM()
    running_model = model_XGBoost()
    frame_classification(generator, running_model, feature_matrix,
                         label_matrix)
예제 #2
0
def main():
    pre_df = csv_to_df("./dataset/", "data.csv")
    feature, label, feature_mean, feature_std = op_feature_ext(pre_df, "Rs")
    running_model = model_XGBoost()
    input_sample, feature_df, origin_col = op_sample_reader(
        "./dataset/", "opt_data.csv", "Rs", feature_mean, feature_std)
    op_result = para_optimize(running_model, feature, label, feature_mean,
                              feature_std, input_sample)
    print(input_sample)
    print(op_result)
예제 #3
0
def op_sample_reader(sample_path, sample_name, col_name, pred_model,
                     feature_mean, feature_std):
    pre_df = csv_to_df(sample_path, sample_name, pred_model)
    origin_col = pre_df[col_name]
    feature_df = pre_df.drop(columns=[col_name], axis=1)
    pre_feature = feature_df.values

    raw_feature = pre_feature[:, 0:-1]
    raw_feature_normalized = (raw_feature - feature_mean) / feature_std
    return raw_feature_normalized, pre_df, origin_col
def pred_sample_reader(sample_path, sample_name, pred_model, feature_mean,
                       feature_std):
    pre_df = csv_to_df(sample_path, sample_name, pred_model)
    pro_df = pre_df.drop(columns=["label"], axis=1)
    pro_df['ApxRs'] = pro_df['Ap'] * pro_df['Rs']
    pro_df['AexRs'] = pro_df['Ae'] * pro_df['Rs']
    pro_df['AexAp'] = pro_df['Ae'] * pro_df['Ap']
    pro_df['ApxRsxAe'] = pro_df['Ap'] * pro_df['Rs'] * pro_df['Ae']
    pro_feature = pro_df.values
    pro_feature_normalized = (pro_feature - feature_mean) / feature_std
    return pro_feature_normalized, pre_df
def main():
    raw_df = csv_to_df("./dataset/", "data.csv")
    columns_entropy = [(col, calcu_each_gain(raw_df[col], raw_df))
                       for col in raw_df.iloc[:, :-1]]
    print(columns_entropy)
예제 #6
0
def main(args):
    print(" ****** Running function list ****** ")
    if args.run_infogain is True:
        print("Priority of parameters influencing product quality")
    if args.run_qualitypredict is True:
        print("Product quality predict")
    if args.run_parameteroptimize is True:
        print("Process parameter optimize")

    print(" ****** Loading trianing data from %s ******"%(args.file_path + args.file_name))
    raw_df = csv_to_df(args.file_path, args.file_name, args.pred_model)
    print(" *** Head of original dataset *** ")
    print(raw_df.head())
    feature_matrix, ori_feature_mean, ori_feature_std = feature_eng(raw_df)
    label_matrix = label_eng(raw_df)

    if args.run_infogain is True:
        print(" ****** Priority of parameters influencing product quality ****** ")
        columns_entropy = [(col, info_gain.calcu_each_gain(raw_df[col], raw_df)) for col in raw_df.iloc[:, :-1]]
        print(" *** Information Gain of process parameter *** ")
        print(columns_entropy)

    if args.run_qualitypredict is True:
        print(" ****** Product quality predict ****** ")

        if args.pred_model == "XGBoost":
            generator = quality_predict.cv_generator(feature_matrix, label_matrix)
        else:
            accuracy_label_list = []
            for ele in label_matrix:
                if ele >= 1.62:
                    accuracy_label_list.append(1)
                else:
                    accuracy_label_list.append(0)
            label_matrix_int = np.array(accuracy_label_list)
            generator = quality_predict.cv_generator(feature_matrix, label_matrix_int)

        print(" *** Training process *** ")
        if args.pred_model == "XGBoost":
            running_model = quality_predict.model_XGBoost()
            quality_predict.frame_classification(generator,
                                                 running_model,
                                                 feature_matrix,
                                                 label_matrix,
                                                 args.output_dir)
        else:
            running_model = quality_predict.model_GBDT()
            quality_predict.frame_regression(generator,
                                             running_model,
                                             feature_matrix,
                                             label_matrix,
                                             args.output_dir)

        pred_norm_feature, pre_pred_df = quality_predict.pred_sample_reader(args.file_path,
                                                                            args.pred_file_name,
                                                                            args.pred_model,
                                                                            ori_feature_mean,
                                                                            ori_feature_std)

        pred_model_dir = args.output_dir + "model_1"
        pred_result = quality_predict.qual_pred(args.pred_model,
                                                pred_model_dir,
                                                pred_norm_feature)

        print(" *** Product quality predict *** ")
        print("Origin process parameter")
        print(pre_pred_df)
        print("Quality result prediction ")
        print(pred_result)

    if args.run_parameteroptimize is True:
        print(" ****** Process parameter optimize ****** ")
        feature, label, feature_mean, feature_std = op_feature_ext(raw_df, args.opt_parameter)
        if args.pred_model == "XGBoost":
            running_model = quality_predict.model_XGBoost()
        else:
            running_model = quality_predict.model_GBDT()
        input_sample, ori_fea, ori_col = op_sample_reader(args.file_path,
                                                          args.opt_file_name,
                                                          args.opt_parameter,
                                                          args.pred_model,
                                                          feature_mean,
                                                          feature_std)
        op_result = para_optimize(running_model,
                                  feature,
                                  label,
                                  args.pred_model,
                                  input_sample)
        print(" *** Process parameter optimize *** ")
        print("Origin process parameter")
        print(ori_fea)
        print("Optimized %s"%(args.opt_parameter))
        print(op_result)