Exemple #1
0
def train(sess,graph,config):
    batch_size=config["batch_size"]
    learning_rate=config["learning_rate"]
    
    if config["validation_dataset"] is None:
        _, train_data,valid_data,info = load_and_split_data(config,filename=config["dataset"],valid_data_rate=config["validation_data_rate"])
    else:
        print("[INFO] training")
        train_data, info = load_data(config, filename=config["dataset"])
        print("[INFO] validation")
        valid_data, valid_info = load_data(config, filename=config["validation_dataset"])
        info["graph_node_num"] = max(info["graph_node_num"], valid_info["graph_node_num"])
        info["graph_num"] = info["graph_num"] + valid_info["graph_num"]

    model = CoreModel(sess,config,info)
    model.build(importlib.import_module(config["model.py"]))
    if config["profile"]:
        vars_to_train = tf.trainable_variables()
        print(vars_to_train)
        writer = tf.summary.FileWriter('logs', sess.graph)
    
    # Training
    start_t = time.time()
    model.fit(train_data,valid_data)
    train_time = time.time() - start_t
    print("traing time:{0}".format(train_time) + "[sec]")
    if valid_data.num>0:
        # Validation
        start_t = time.time()
        validation_cost,validation_metrics,prediction_data=model.pred_and_eval(valid_data)
        infer_time = time.time() - start_t
        print("final cost =",validation_cost)
        print("accuracy   =",validation_metrics["accuracy"])
        print("validation time:{0}".format(infer_time) + "[sec]")
    # Saving
    if config["save_info_valid"] is not None:
        result={}
        result["validation_cost"]=validation_cost
        result["validation_accuracy"]=validation_metrics
        result["train_time"]=train_time
        result["infer_time"]=infer_time
        save_path=config["save_info_valid"]
        print("[SAVE] ",save_path)
        fp=open(save_path,"w")
        json.dump(result,fp, indent=4, cls=NumPyArangeEncoder)
    
    if config["export_model"]:
        try:
            print("[SAVE]",config["export_model"])
            graph_def = graph_util.convert_variables_to_constants(sess, graph.as_graph_def(), ['output'])
            tf.train.write_graph(graph_def, '.', config["export_model"], as_text=False)
        except:
            print('[ERROR] output has been not found')
    if config["save_result_valid"] is not None:
        filename=config["save_result_valid"]
        save_prediction(filename,prediction_data)
    if config["make_plot"]:
        plot_cost(config,valid_data,model)
        plot_auc(config,valid_data.labels,np.array(prediction_data))
Exemple #2
0
def _train(layers):
    config = _get_config()
    _, train_data, valid_data, info = load_and_split_data(
        config,
        filename=config["dataset"],
        valid_data_rate=config["validation_data_rate"])
    metric_name = "accuracy"
    with tf.Session() as sess:
        model = CoreModel(sess, config, info)
        model.build(GCN(layers), True, False, None)
        model.fit(train_data, valid_data)
        _, valid_metrics, _ = model.pred_and_eval(valid_data)
    return valid_metrics[metric_name]
Exemple #3
0
def reconstruct(sess, config):
    batch_size = config["batch_size"]
    model = importlib.import_module(config["model.py"])
    dataset_filename = config["dataset"]
    if "dataset_test" in config:
        dataset_filename = config["dataset_test"]
    all_data, info = load_data(config, filename=dataset_filename)

    graph_index_list = []
    for i in range(all_data.num):
        graph_index_list.append([i, i])
    info.graph_index_list = graph_index_list
    info.pos_weight = get_pos_weight(all_data)
    info.norm = get_norm(all_data)
    print("pos_weight=", info.pos_weight)
    print("norm=", info.pos_weight)

    model = CoreModel(sess,
                      config,
                      info,
                      construct_feed_callback=construct_feed)
    model.build(importlib.import_module(config["model.py"]), is_train=False)

    vars_to_train = tf.trainable_variables()
    for v in vars_to_train:
        print(v)

    # initialize session
    saver = tf.train.Saver()
    #sess.run(tf.global_variables_initializer())
    print("[load]", config["load_model"])
    saver.restore(sess, config["load_model"])

    start_t = time.time()
    cost, acc, pred_data = model.pred_and_eval(all_data)
    recons_data = pred_data
    """
    recons_data=[]
    for i in range(3):
        print(i)
        cost,acc,pred_data=model.pred_and_eval(all_data)
        recons_data.append(pred_data)
    """
    if "reconstruction_test" in config:
        filename = config["reconstruction_test"]
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        print("[SAVE]", filename)
        joblib.dump(recons_data, filename)
Exemple #4
0
def infer(sess,graph,config):
    batch_size=config["batch_size"]
    model = importlib.import_module(config["model.py"])
    dataset_filename=config["dataset"]
    if "dataset_test" in config:
        dataset_filename=config["dataset_test"]
    all_data,info=load_data(config,filename=dataset_filename)
    
    model = CoreModel(sess,config,info)
    model.build(importlib.import_module(config["model.py"]),is_train=False)
    
    # Initialize session
    saver = tf.train.Saver()
    #sess.run(tf.global_variables_initializer())
    print("[LOAD]",config["load_model"])
    saver.restore(sess,config["load_model"])

    # Validation
    start_t = time.time()
    test_cost,test_metrics,prediction_data=model.pred_and_eval(all_data)
    infer_time = time.time() - start_t
    print("final cost =",test_cost)
    print("accuracy   =",test_metrics["accuracy"])
    print("infer time:{0}".format(infer_time) + "[sec]")
    
    if config["save_info_test"] is not None:
        result={}
        result["test_cost"]=test_cost
        result["test_accuracy"]=test_metrics
        result["infer_time"]=infer_time
        save_path=config["save_info_test"]
        print("[SAVE] ",save_path)
        fp=open(save_path,"w")
        json.dump(result,fp, indent=4, cls=NumPyArangeEncoder)
    
    if config["prediction_data"] is None:
        print("[ERROR] prediction_data is required")
        quit()

    obj = {}
    obj["prediction_data"] = prediction_data
    obj["labels"         ] = all_data.labels
        
    os.makedirs(os.path.dirname(config["prediction_data"]), exist_ok=True)
    joblib.dump(obj,config["prediction_data"])
Exemple #5
0
def train_cv(sess,graph,config):
    from sklearn.model_selection import KFold
    from kgcn.make_plots import make_auc_plot, make_cost_acc_plot
    import sklearn
    from sklearn.metrics import roc_curve, auc, accuracy_score,precision_recall_fscore_support
    from scipy import interp
    
    batch_size=config["batch_size"]
    learning_rate=config["learning_rate"]
    
    
    all_data,info=load_data(config,filename=config["dataset"])
    model = CoreModel(sess,config,info)
    model.build(importlib.import_module(config["model.py"]))
    # Training
    kf = KFold(n_splits=config["k-fold_num"], shuffle=True, random_state=123)
    
    kf_count=1
    fold_data_list=[]
    if all_data["labels"] is not None:
        split_base=all_data["labels"]
    else:
        split_base=all_data["label_list"][0]
    score_metrics=[]
    if config["task"]=="regression":
        metric_name="mse"
    else:
        metric_name="accuracy"
    for train_valid_list, test_list in kf.split(split_base):
        print("starting fold:{0}".format(kf_count))
        train_valid_data,test_data = split_data(all_data,
            indices_for_train_data=train_valid_list,indices_for_valid_data=test_list)
        
        train_data,valid_data=split_data(train_valid_data,valid_data_rate=config["validation_data_rate"])
        # Training
        start_t = time.time()
        model.fit(train_data,valid_data,k_fold_num=kf_count)
        train_time = time.time() - start_t
        print("traing time:{0}".format(train_time) + "[sec]")
        # Test
        start_t = time.time()
        test_cost,test_metrics,prediction_data=model.pred_and_eval(test_data)
        infer_time = time.time() - start_t
        print("final cost =",test_cost)
        print("%s   =%f"%(metric_name,test_metrics[metric_name]))
        score_metrics.append(test_metrics[metric_name])
        print("infer time:{0}".format(infer_time) + "[sec]")
    
        if config["export_model"]:
            try:
                name,ext=os.path.splitext(config["export_model"])
                filename=name+"."+str(kf_count)+ext
                print("[SAVE]",filename)
                graph_def = graph_util.convert_variables_to_constants(sess, graph.as_graph_def(), ['output'])
                tf.train.write_graph(graph_def, '.', filename, as_text=False)
            except:
                print('[ERROR] output has been not found')
        # save fold data
        fold_data=dotdict({})
        fold_data.prediction_data=prediction_data
        if all_data["labels"] is not None:
            fold_data.test_labels=test_data.labels
        else:
            fold_data.test_labels=test_data.label_list
        fold_data.test_data_idx=test_list
        if config["task"]=="regression":
            fold_data.training_mse=[el["training_mse"] for el in model.training_metrics_list]
            fold_data.validation_mse=[el["validation_mse"] for el in model.validation_metrics_list]
        else:
            fold_data.training_acc=[el["training_accuracy"] for el in model.training_metrics_list]
            fold_data.validation_acc=[el["validation_accuracy"] for el in model.validation_metrics_list]
        fold_data.test_acc=test_metrics[metric_name]
        fold_data.training_cost=model.training_cost_list
        fold_data.validation_cost=model.validation_cost_list
        fold_data.test_cost=test_cost
        fold_data.train_time=train_time
        fold_data.infer_time=infer_time
        fold_data_list.append(fold_data)
        kf_count+=1
    
    print("cv %s(mean)   =%f"%(metric_name,np.mean(score_metrics)))
    print("cv %s(std.)   =%f"%(metric_name,np.std(score_metrics)))
    if "save_info_cv" in config and config["save_info_cv"] is not None:
        save_path=config["save_info_cv"]
        print("[SAVE] ",save_path)
        _,ext=os.path.splitext(save_path)
        if ext==".json":
            json.dump(fold_data_list,fp, indent=4, cls=NumPyArangeEncoder)
        else:
            joblib.dump(fold_data_list,save_path,compress=True)
    
    #
    if "save_result_cv" in config and config["save_result_cv"] is not None:
        result_cv=[]
        for j,fold_data in enumerate(fold_data_list):
            pred_score = np.array(fold_data.prediction_data)
            if len(pred_score.shape)==3: # multi-label-multi-task
                # #data x # task x #class
                # => this program supports only 2 labels 
                pred_score=pred_score[:,:,1]
            true_label = np.array(fold_data.test_labels)
            # #data x # task x #class
            if len(pred_score.shape)==1:
                pred_score=pred_score[:,np.newaxis]
            if len(true_label.shape)==1:
                true_label=true_label[:,np.newaxis]
            v=[]
            for i in range(info.label_dim):
                el={}
                if config["task"]=="regression":
                    el["r2"] = sklearn.metrics.r2_score(true_label[:,i],pred_score[:,i])
                    el["mse"] = sklearn.metrics.mean_squared_error(true_label[:,i],pred_score[:,i])
                else:
                    pred = np.zeros(pred_score.shape)
                    pred[pred_score>0.5]=1
                    fpr, tpr, _ = roc_curve(true_label[:, i], pred_score[:, i], pos_label=1)
                    roc_auc = auc(fpr, tpr)
                    acc=accuracy_score(true_label[:, i], pred[:, i])
                    scores=precision_recall_fscore_support(true_label[:, i], pred[:, i],average='binary')
                    el["auc"]=roc_auc
                    el["acc"]=acc
                    el["pre"]=scores[0]
                    el["rec"]=scores[1]
                    el["f"]=scores[2]
                    el["sup"]=scores[3]
                v.append(el)
            result_cv.append(v)
        save_path=config["save_result_cv"]
        print("[SAVE] ",save_path)
        fp=open(save_path,"w")
        json.dump(result_cv,fp, indent=4, cls=NumPyArangeEncoder)
    #        
    for i,fold_data in enumerate(fold_data_list):
        prefix="fold"+str(i)+"_"
        result_path = config["plot_path"]
        os.makedirs(result_path, exist_ok=True)
        if config["make_plot"]:
            if config["task"]=="regression":
                # plot cost
                make_cost_acc_plot(fold_data.training_cost,
                    fold_data.validation_cost,
                    fold_data.training_mse, fold_data.validation_mse, result_path+prefix)
                pred_score = np.array(fold_data.prediction_data)
                plot_r2(config,fold_data.test_labels,pred_score,prefix=prefix)
            else:
                # plot cost
                make_cost_acc_plot(fold_data.training_cost,
                    fold_data.validation_cost,
                    fold_data.training_acc, fold_data.validation_acc, result_path+prefix)
                # plot AUC
                pred_score = np.array(fold_data.prediction_data)
                plot_auc(config,fold_data.test_labels,pred_score,prefix=prefix)
Exemple #6
0
def train(sess, config):
    batch_size = config["batch_size"]
    learning_rate = config["learning_rate"]

    all_data, train_data, valid_data, info = load_and_split_data(
        config,
        filename=config["dataset"],
        valid_data_rate=config["validation_data_rate"])
    # train model
    graph_index_list = []
    for i in range(all_data.num):
        graph_index_list.append([i, i])
    info.graph_index_list = graph_index_list
    info.pos_weight = get_pos_weight(train_data)
    info.norm = get_norm(train_data)
    print("pos_weight=", info.pos_weight)
    print("norm=", info.pos_weight)

    model = CoreModel(sess,
                      config,
                      info,
                      construct_feed_callback=construct_feed)
    model.build(importlib.import_module(config["model.py"]))

    vars_to_train = tf.trainable_variables()
    for v in vars_to_train:
        print(v)

    # Training
    start_t = time.time()
    model.fit(train_data, valid_data)
    train_time = time.time() - start_t
    print("traing time:{0}".format(train_time) + "[sec]")
    # Validation
    start_t = time.time()
    validation_cost, validation_accuracy, validation_prediction_data = model.pred_and_eval(
        valid_data)
    training_cost, training_accuracy, training_prediction_data = model.pred_and_eval(
        train_data)
    infer_time = time.time() - start_t
    print("final cost(training  ) =", training_cost)
    print("accuracy  (training  ) =", training_accuracy["accuracy"])
    print("final cost(validation) =", validation_cost)
    print("accuracy  (validation) =", validation_accuracy["accuracy"])
    print("infer time:{0}".format(infer_time) + "[sec]")
    # Saving
    if config["save_info_valid"] is not None:
        result = {}
        result["validation_cost"] = validation_cost
        result["validation_accuracy"] = validation_accuracy["accuracy"]
        result["train_time"] = train_time
        result["infer_time"] = infer_time
        save_path = config["save_info_valid"]
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        print("[SAVE] ", save_path)
        fp = open(save_path, "w")
        json.dump(result, fp, indent=4)

    if config["save_info_train"] is not None:
        result = {}
        result["test_cost"] = training_cost
        result["test_accuracy"] = training_accuracy["accuracy"]
        result["train_time"] = train_time
        save_path = config["save_info_train"]
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        print("[SAVE] ", save_path)
        fp = open(save_path, "w")
        json.dump(result, fp, indent=4, cls=NumPyArangeEncoder)

    if "reconstruction_valid" in config:
        filename = config["reconstruction_valid"]
        print(os.path.dirname(filename))
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        print("[SAVE]", filename)
        joblib.dump(validation_prediction_data, filename)
    if "reconstruction_train" in config:
        filename = config["reconstruction_train"]
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        print("[SAVE]", filename)
        joblib.dump(training_prediction_data, filename)