def reconstruct(sess, config): batch_size = config["batch_size"] model = importlib.import_module(config["model.py"]) dataset_filename = config["dataset"] if "dataset_test" in config: dataset_filename = config["dataset_test"] all_data, info = load_data(config, filename=dataset_filename) graph_index_list = [] for i in range(all_data.num): graph_index_list.append([i, i]) info.graph_index_list = graph_index_list info.pos_weight = get_pos_weight(all_data) info.norm = get_norm(all_data) print("pos_weight=", info.pos_weight) print("norm=", info.pos_weight) model = CoreModel(sess, config, info, construct_feed_callback=construct_feed) model.build(importlib.import_module(config["model.py"]), is_train=False) vars_to_train = tf.trainable_variables() for v in vars_to_train: print(v) # initialize session saver = tf.train.Saver() #sess.run(tf.global_variables_initializer()) print("[load]", config["load_model"]) saver.restore(sess, config["load_model"]) start_t = time.time() cost, acc, pred_data = model.pred_and_eval(all_data) recons_data = pred_data """ recons_data=[] for i in range(3): print(i) cost,acc,pred_data=model.pred_and_eval(all_data) recons_data.append(pred_data) """ if "reconstruction_test" in config: filename = config["reconstruction_test"] os.makedirs(os.path.dirname(filename), exist_ok=True) print("[SAVE]", filename) joblib.dump(recons_data, filename)
def infer(sess, graph, config): batch_size = config["batch_size"] model = importlib.import_module(config["model.py"]) dataset_filename = config["dataset"] if "dataset_test" in config: dataset_filename = config["dataset_test"] all_data, info = load_data(config, filename=dataset_filename) model = CoreModel(sess, config, info) model.build(importlib.import_module(config["model.py"]), is_train=False) # Initialize session saver = tf.train.Saver() #sess.run(tf.global_variables_initializer()) print("[LOAD]", config["load_model"]) saver.restore(sess, config["load_model"]) # Validation start_t = time.time() test_cost, test_metrics, prediction_data = model.pred_and_eval(all_data) infer_time = time.time() - start_t print("final cost =", test_cost) print("accuracy =", test_metrics["accuracy"]) print("infer time:{0}".format(infer_time) + "[sec]") if config["save_info_test"] is not None: result = {} result["test_cost"] = test_cost result["test_accuracy"] = test_metrics result["infer_time"] = infer_time save_path = config["save_info_test"] print("[SAVE] ", save_path) fp = open(save_path, "w") json.dump(result, fp, indent=4, cls=NumPyArangeEncoder) if config["prediction_data"] is None: print("[ERROR] prediction_data is required") quit() obj = {} obj["prediction_data"] = prediction_data obj["labels"] = all_data.labels os.makedirs(os.path.dirname(config["prediction_data"]), exist_ok=True) joblib.dump(obj, config["prediction_data"])
def train(sess,graph,config): batch_size=config["batch_size"] learning_rate=config["learning_rate"] if config["validation_dataset"] is None: _, train_data,valid_data,info = load_and_split_data(config,filename=config["dataset"],valid_data_rate=config["validation_data_rate"]) else: print("[INFO] training") train_data, info = load_data(config, filename=config["dataset"]) print("[INFO] validation") valid_data, valid_info = load_data(config, filename=config["validation_dataset"]) info["graph_node_num"] = max(info["graph_node_num"], valid_info["graph_node_num"]) info["graph_num"] = info["graph_num"] + valid_info["graph_num"] model = CoreModel(sess,config,info) model.build(importlib.import_module(config["model.py"])) if config["profile"]: vars_to_train = tf.trainable_variables() print(vars_to_train) writer = tf.summary.FileWriter('logs', sess.graph) # Training start_t = time.time() model.fit(train_data,valid_data) train_time = time.time() - start_t print("traing time:{0}".format(train_time) + "[sec]") if valid_data.num>0: # Validation start_t = time.time() validation_cost,validation_metrics,prediction_data=model.pred_and_eval(valid_data) infer_time = time.time() - start_t print("final cost =",validation_cost) print("accuracy =",validation_metrics["accuracy"]) print("validation time:{0}".format(infer_time) + "[sec]") # Saving if config["save_info_valid"] is not None: result={} result["validation_cost"]=validation_cost result["validation_accuracy"]=validation_metrics result["train_time"]=train_time result["infer_time"]=infer_time save_path=config["save_info_valid"] print("[SAVE] ",save_path) fp=open(save_path,"w") json.dump(result,fp, indent=4, cls=NumPyArangeEncoder) if config["export_model"]: try: print("[SAVE]",config["export_model"]) graph_def = graph_util.convert_variables_to_constants(sess, graph.as_graph_def(), ['output']) tf.train.write_graph(graph_def, '.', config["export_model"], as_text=False) except: print('[ERROR] output has been not found') if config["save_result_valid"] is not None: filename=config["save_result_valid"] save_prediction(filename,prediction_data) if config["make_plot"]: plot_cost(config,valid_data,model) plot_auc(config,valid_data.labels,np.array(prediction_data))
def train_cv(sess, graph, config): from sklearn.model_selection import KFold from gcn_modules.make_plots import make_auc_plot, make_cost_acc_plot import sklearn from sklearn.metrics import roc_curve, auc, accuracy_score, precision_recall_fscore_support from scipy import interp batch_size = config["batch_size"] learning_rate = config["learning_rate"] all_data, info = load_data(config, filename=config["dataset"]) model = CoreModel(sess, config, info) model.build(importlib.import_module(config["model.py"])) # Training kf = KFold(n_splits=config["k-fold_num"], shuffle=True, random_state=123) kf_count = 1 fold_data_list = [] if all_data["labels"] is not None: split_base = all_data["labels"] else: split_base = all_data["label_list"][0] score_metrics = [] if config["task"] == "regression": metric_name = "mse" else: metric_name = "accuracy" for train_valid_list, test_list in kf.split(split_base): print("starting fold:{0}".format(kf_count)) train_valid_data, test_data = split_data( all_data, indices_for_train_data=train_valid_list, indices_for_valid_data=test_list) train_data, valid_data = split_data( train_valid_data, valid_data_rate=config["validation_data_rate"]) # Training start_t = time.time() model.fit(train_data, valid_data, k_fold_num=kf_count) train_time = time.time() - start_t print("traing time:{0}".format(train_time) + "[sec]") # Test start_t = time.time() test_cost, test_metrics, prediction_data = model.pred_and_eval( test_data) infer_time = time.time() - start_t print("final cost =", test_cost) print("%s =%f" % (metric_name, test_metrics[metric_name])) score_metrics.append(test_metrics[metric_name]) print("infer time:{0}".format(infer_time) + "[sec]") if config["export_model"]: try: name, ext = os.path.splitext(config["export_model"]) filename = name + "." + str(kf_count) + ext print("[SAVE]", filename) graph_def = graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), ['output']) tf.train.write_graph(graph_def, '.', filename, as_text=False) except: print('[ERROR] output has been not found') # save fold data fold_data = dotdict({}) fold_data.prediction_data = prediction_data if all_data["labels"] is not None: fold_data.test_labels = test_data.labels else: fold_data.test_labels = test_data.label_list fold_data.test_data_idx = test_list if config["task"] == "regression": fold_data.training_mse = [ el["training_mse"] for el in model.training_metrics_list ] fold_data.validation_mse = [ el["validation_mse"] for el in model.validation_metrics_list ] else: fold_data.training_acc = [ el["training_accuracy"] for el in model.training_metrics_list ] fold_data.validation_acc = [ el["validation_accuracy"] for el in model.validation_metrics_list ] fold_data.test_acc = test_metrics[metric_name] fold_data.training_cost = model.training_cost_list fold_data.validation_cost = model.validation_cost_list fold_data.test_cost = test_cost fold_data.train_time = train_time fold_data.infer_time = infer_time fold_data_list.append(fold_data) kf_count += 1 print("cv %s(mean) =%f" % (metric_name, np.mean(score_metrics))) print("cv %s(std.) =%f" % (metric_name, np.std(score_metrics))) if "save_info_cv" in config and config["save_info_cv"] is not None: save_path = config["save_info_cv"] print("[SAVE] ", save_path) _, ext = os.path.splitext(save_path) if ext == ".json": json.dump(fold_data_list, fp, indent=4, cls=NumPyArangeEncoder) else: joblib.dump(fold_data_list, save_path, compress=True) # if "save_result_cv" in config and config["save_result_cv"] is not None: result_cv = [] for j, fold_data in enumerate(fold_data_list): pred_score = np.array(fold_data.prediction_data) if len(pred_score.shape) == 3: # multi-label-multi-task # #data x # task x #class # => this program supports only 2 labels pred_score = pred_score[:, :, 1] true_label = np.array(fold_data.test_labels) # #data x # task x #class if len(pred_score.shape) == 1: pred_score = pred_score[:, np.newaxis] if len(true_label.shape) == 1: true_label = true_label[:, np.newaxis] v = [] for i in range(info.label_dim): el = {} if config["task"] == "regression": el["r2"] = sklearn.metrics.r2_score( true_label[:, i], pred_score[:, i]) el["mse"] = sklearn.metrics.mean_squared_error( true_label[:, i], pred_score[:, i]) else: pred = np.zeros(pred_score.shape) pred[pred_score > 0.5] = 1 fpr, tpr, _ = roc_curve(true_label[:, i], pred_score[:, i], pos_label=1) roc_auc = auc(fpr, tpr) acc = accuracy_score(true_label[:, i], pred[:, i]) scores = precision_recall_fscore_support(true_label[:, i], pred[:, i], average='binary') el["auc"] = roc_auc el["acc"] = acc el["pre"] = scores[0] el["rec"] = scores[1] el["f"] = scores[2] el["sup"] = scores[3] v.append(el) result_cv.append(v) save_path = config["save_result_cv"] print("[SAVE] ", save_path) fp = open(save_path, "w") json.dump(result_cv, fp, indent=4, cls=NumPyArangeEncoder) # for i, fold_data in enumerate(fold_data_list): prefix = "fold" + str(i) + "_" result_path = config["plot_path"] os.makedirs(result_path, exist_ok=True) if config["make_plot"]: if config["task"] == "regression": # plot cost make_cost_acc_plot(fold_data.training_cost, fold_data.validation_cost, fold_data.training_mse, fold_data.validation_mse, result_path + prefix) pred_score = np.array(fold_data.prediction_data) plot_r2(config, fold_data.test_labels, pred_score, prefix=prefix) else: # plot cost make_cost_acc_plot(fold_data.training_cost, fold_data.validation_cost, fold_data.training_acc, fold_data.validation_acc, result_path + prefix) # plot AUC pred_score = np.array(fold_data.prediction_data) plot_auc(config, fold_data.test_labels, pred_score, prefix=prefix)
def train(sess, config): batch_size = config["batch_size"] learning_rate = config["learning_rate"] all_data, train_data, valid_data, info = load_and_split_data( config, filename=config["dataset"], valid_data_rate=config["validation_data_rate"]) # train model graph_index_list = [] for i in range(all_data.num): graph_index_list.append([i, i]) info.graph_index_list = graph_index_list info.pos_weight = get_pos_weight(train_data) info.norm = get_norm(train_data) print("pos_weight=", info.pos_weight) print("norm=", info.pos_weight) model = CoreModel(sess, config, info, construct_feed_callback=construct_feed) model.build(importlib.import_module(config["model.py"])) vars_to_train = tf.trainable_variables() for v in vars_to_train: print(v) # Training start_t = time.time() model.fit(train_data, valid_data) train_time = time.time() - start_t print("traing time:{0}".format(train_time) + "[sec]") # Validation start_t = time.time() validation_cost, validation_accuracy, validation_prediction_data = model.pred_and_eval( valid_data) training_cost, training_accuracy, training_prediction_data = model.pred_and_eval( train_data) infer_time = time.time() - start_t print("final cost(training ) =", training_cost) print("accuracy (training ) =", training_accuracy["accuracy"]) print("final cost(validation) =", validation_cost) print("accuracy (validation) =", validation_accuracy["accuracy"]) print("infer time:{0}".format(infer_time) + "[sec]") # Saving if config["save_info_valid"] is not None: result = {} result["validation_cost"] = validation_cost result["validation_accuracy"] = validation_accuracy["accuracy"] result["train_time"] = train_time result["infer_time"] = infer_time save_path = config["save_info_valid"] os.makedirs(os.path.dirname(save_path), exist_ok=True) print("[SAVE] ", save_path) fp = open(save_path, "w") json.dump(result, fp, indent=4) if config["save_info_train"] is not None: result = {} result["test_cost"] = training_cost result["test_accuracy"] = training_accuracy["accuracy"] result["train_time"] = train_time save_path = config["save_info_train"] os.makedirs(os.path.dirname(save_path), exist_ok=True) print("[SAVE] ", save_path) fp = open(save_path, "w") json.dump(result, fp, indent=4, cls=NumPyArangeEncoder) if "reconstruction_valid" in config: filename = config["reconstruction_valid"] print(os.path.dirname(filename)) os.makedirs(os.path.dirname(filename), exist_ok=True) print("[SAVE]", filename) joblib.dump(validation_prediction_data, filename) if "reconstruction_train" in config: filename = config["reconstruction_train"] os.makedirs(os.path.dirname(filename), exist_ok=True) print("[SAVE]", filename) joblib.dump(training_prediction_data, filename)