def main(): pre_df = csv_to_df("./dataset/", "data.csv") feature_matrix = feature_eng(pre_df) label_matrix = label_eng(pre_df) generator = cv_generator(feature_matrix, label_matrix) #running_model = model_LR() #running_model = model_SVM() running_model = model_XGBoost() frame_classification(generator, running_model, feature_matrix, label_matrix)
def main(): pre_df = csv_to_df("./dataset/", "data.csv") feature, label, feature_mean, feature_std = op_feature_ext(pre_df, "Rs") running_model = model_XGBoost() input_sample, feature_df, origin_col = op_sample_reader( "./dataset/", "opt_data.csv", "Rs", feature_mean, feature_std) op_result = para_optimize(running_model, feature, label, feature_mean, feature_std, input_sample) print(input_sample) print(op_result)
def op_sample_reader(sample_path, sample_name, col_name, pred_model, feature_mean, feature_std): pre_df = csv_to_df(sample_path, sample_name, pred_model) origin_col = pre_df[col_name] feature_df = pre_df.drop(columns=[col_name], axis=1) pre_feature = feature_df.values raw_feature = pre_feature[:, 0:-1] raw_feature_normalized = (raw_feature - feature_mean) / feature_std return raw_feature_normalized, pre_df, origin_col
def pred_sample_reader(sample_path, sample_name, pred_model, feature_mean, feature_std): pre_df = csv_to_df(sample_path, sample_name, pred_model) pro_df = pre_df.drop(columns=["label"], axis=1) pro_df['ApxRs'] = pro_df['Ap'] * pro_df['Rs'] pro_df['AexRs'] = pro_df['Ae'] * pro_df['Rs'] pro_df['AexAp'] = pro_df['Ae'] * pro_df['Ap'] pro_df['ApxRsxAe'] = pro_df['Ap'] * pro_df['Rs'] * pro_df['Ae'] pro_feature = pro_df.values pro_feature_normalized = (pro_feature - feature_mean) / feature_std return pro_feature_normalized, pre_df
def main(): raw_df = csv_to_df("./dataset/", "data.csv") columns_entropy = [(col, calcu_each_gain(raw_df[col], raw_df)) for col in raw_df.iloc[:, :-1]] print(columns_entropy)
def main(args): print(" ****** Running function list ****** ") if args.run_infogain is True: print("Priority of parameters influencing product quality") if args.run_qualitypredict is True: print("Product quality predict") if args.run_parameteroptimize is True: print("Process parameter optimize") print(" ****** Loading trianing data from %s ******"%(args.file_path + args.file_name)) raw_df = csv_to_df(args.file_path, args.file_name, args.pred_model) print(" *** Head of original dataset *** ") print(raw_df.head()) feature_matrix, ori_feature_mean, ori_feature_std = feature_eng(raw_df) label_matrix = label_eng(raw_df) if args.run_infogain is True: print(" ****** Priority of parameters influencing product quality ****** ") columns_entropy = [(col, info_gain.calcu_each_gain(raw_df[col], raw_df)) for col in raw_df.iloc[:, :-1]] print(" *** Information Gain of process parameter *** ") print(columns_entropy) if args.run_qualitypredict is True: print(" ****** Product quality predict ****** ") if args.pred_model == "XGBoost": generator = quality_predict.cv_generator(feature_matrix, label_matrix) else: accuracy_label_list = [] for ele in label_matrix: if ele >= 1.62: accuracy_label_list.append(1) else: accuracy_label_list.append(0) label_matrix_int = np.array(accuracy_label_list) generator = quality_predict.cv_generator(feature_matrix, label_matrix_int) print(" *** Training process *** ") if args.pred_model == "XGBoost": running_model = quality_predict.model_XGBoost() quality_predict.frame_classification(generator, running_model, feature_matrix, label_matrix, args.output_dir) else: running_model = quality_predict.model_GBDT() quality_predict.frame_regression(generator, running_model, feature_matrix, label_matrix, args.output_dir) pred_norm_feature, pre_pred_df = quality_predict.pred_sample_reader(args.file_path, args.pred_file_name, args.pred_model, ori_feature_mean, ori_feature_std) pred_model_dir = args.output_dir + "model_1" pred_result = quality_predict.qual_pred(args.pred_model, pred_model_dir, pred_norm_feature) print(" *** Product quality predict *** ") print("Origin process parameter") print(pre_pred_df) print("Quality result prediction ") print(pred_result) if args.run_parameteroptimize is True: print(" ****** Process parameter optimize ****** ") feature, label, feature_mean, feature_std = op_feature_ext(raw_df, args.opt_parameter) if args.pred_model == "XGBoost": running_model = quality_predict.model_XGBoost() else: running_model = quality_predict.model_GBDT() input_sample, ori_fea, ori_col = op_sample_reader(args.file_path, args.opt_file_name, args.opt_parameter, args.pred_model, feature_mean, feature_std) op_result = para_optimize(running_model, feature, label, args.pred_model, input_sample) print(" *** Process parameter optimize *** ") print("Origin process parameter") print(ori_fea) print("Optimized %s"%(args.opt_parameter)) print(op_result)