def test_predict_proba(self): pickle_fname = 'abp_CV_fold_1_tlXlY_trn.pkl' gcn_graph = GCNDataset.load_transkribus_pickle(pickle_fname) gcn_graph_train = [gcn_graph[8], gcn_graph[18], gcn_graph[29]] node_dim = gcn_graph[0].X.shape[1] edge_dim = gcn_graph[0].E.shape[1] - 2.0 nb_class = gcn_graph[0].Y.shape[1] gcn_model = GraphAttNet(node_dim, nb_class, num_layers=1, learning_rate=0.01, node_indim=-1, nb_attention=3) gcn_model.dropout_rate_node = 0.2 gcn_model.dropout_rate_attention = 0.2 gcn_model.create_model() with tf.Session() as session: session.run([gcn_model.init]) # Get the Test Prediction gcn_model.train_lG(session, gcn_graph) g_proba = gcn_model.prediction_prob(session, gcn_graph_train[1]) print(g_proba.shape) print(type(g_proba)) print(gcn_graph_train[1].X.shape) self.assertTrue(g_proba.shape == (gcn_graph_train[1].X.shape[0], 5))
def test_graphattnet_train_dropout(self): pickle_fname = 'abp_CV_fold_1_tlXlY_trn.pkl' gcn_graph = GCNDataset.load_transkribus_pickle(pickle_fname) gcn_graph_train = [gcn_graph[8], gcn_graph[18], gcn_graph[29]] node_dim = gcn_graph[0].X.shape[1] edge_dim = gcn_graph[0].E.shape[1] - 2.0 nb_class = gcn_graph[0].Y.shape[1] gcn_model = GraphAttNet(node_dim, nb_class, num_layers=1, learning_rate=0.01, node_indim=-1, nb_attention=3) gcn_model.dropout_rate_node = 0.2 gcn_model.dropout_rate_attention = 0.2 gcn_model.create_model() with tf.Session() as session: session.run([gcn_model.init]) # Get the Test Prediction g_acc, node_acc = gcn_model.test_lG(session, gcn_graph_train) print('Mean Accuracy', g_acc, node_acc) gcn_model.train_lG(session, gcn_graph) g_acc, node_acc = gcn_model.test_lG(session, gcn_graph_train) print('Mean Accuracy', g_acc, node_acc)
def test_train_ensemble_NN_model(self): #TODO Make a proper synthetic dataset for test Purpose pickle_fname = 'abp_CV_fold_1_tlXlY_trn.pkl' gcn_graph = GCNDataset.load_transkribus_pickle(pickle_fname) gcn_graph_train = [gcn_graph[8], gcn_graph[18], gcn_graph[29]] node_dim = gcn_graph[0].X.shape[1] edge_dim = gcn_graph[0].E.shape[1] - 2.0 nb_class = gcn_graph[0].Y.shape[1] gat_model = GraphAttNet(node_dim, nb_class, num_layers=1, learning_rate=0.01, node_indim=-1, nb_attention=3) gat_model.dropout_rate_node = 0.2 gat_model.dropout_rate_attention = 0.2 gat_model.create_model() nb_layers = 3 lr = 0.001 nb_conv = 2 ecn_model = EdgeConvNet( node_dim, edge_dim, nb_class, num_layers=nb_layers, learning_rate=lr, mu=0.0, node_indim=-1, nconv_edge=nb_conv, ) ecn_model.create_model() #Check Graphs #Are we recopying the models and graph definition implicitly ? ensemble = EnsembleGraphNN([ecn_model, gat_model]) with tf.Session() as session: session.run([ensemble.models[0].init]) for iter in range(500): ensemble.train_lG(session, gcn_graph_train) prediction = ensemble.predict_lG(session, gcn_graph_train) print(prediction) self.assertTrue(len(prediction) == len(gcn_graph_train)) print('Ensemble Prediction') accs = ensemble.test_lG(session, gcn_graph_train) print('Base Predictions') for m in ensemble.models: accs = m.test_lG(session, gcn_graph_train) print(accs) print(accs)
def main_fold(foldid, configid, outdir): ''' Simple Fold experiment, loading one fold, train and test :param foldid: :param configid: :param outdir: :return: ''' pickle_train = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str( foldid) + '_tlXlY_trn.pkl' pickle_test = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str( foldid) + '_tlXlY_tst.pkl' train_graph = GCNDataset.load_transkribus_pickle(pickle_train) test_graph = GCNDataset.load_transkribus_pickle(pickle_test) config = get_config(configid) #acc_test = run_model(train_graph, config, test_graph) #print('Accuracy Test', acc_test) outpicklefname = os.path.join( FLAGS.out_dir, 'table_F' + str(FLAGS.fold) + '_C' + str(FLAGS.configid) + '.pickle') run_model_train_val_test(train_graph, config, test_graph, outpicklefname)
def test_predict(self): pickle_fname = 'abp_CV_fold_1_tlXlY_trn.pkl' gcn_graph = GCNDataset.load_transkribus_pickle(pickle_fname) gcn_graph_train = [gcn_graph[8], gcn_graph[18], gcn_graph[29]] node_dim = gcn_graph[0].X.shape[1] edge_dim = gcn_graph[0].E.shape[1] - 2.0 nb_class = gcn_graph[0].Y.shape[1] gcn_model = EdgeConvNet(node_dim, edge_dim, nb_class, num_layers=2, learning_rate=0.01, mu=0.0, node_indim=-1, nconv_edge=5) # gcn_model =EdgeConvNet(node_dim,edge_dim,nb_class,num_layers=1,learning_rate=0.001,mu=0.0,node_indim=-1) gcn_model.stack_instead_add = True gcn_model.fast_convolve = True gcn_model.create_model() # pdb.set_trace() nb_iter = 50 with tf.Session() as session: session.run([gcn_model.init]) # Sample each graph # random for i in range(nb_iter): gcn_model.train_lG(session, gcn_graph_train) #Get the Test Prediction g_acc, node_acc = gcn_model.test_lG(session, gcn_graph_train) print('Mean Accuracy', g_acc, node_acc) # Get the Test Prediction lY_pred = gcn_model.predict_lG(session, gcn_graph_train, verbose=False) tp = 0 nb_node = 0 Ytrue_l = [] lY_l = [] for lY, graph in zip(lY_pred, gcn_graph_train): Ytrue = np.argmax(graph.Y, axis=1) Ytrue_l.extend(Ytrue) lY_l.extend(lY) tp += sum(Ytrue == lY) #pdb.set_trace() nb_node += Ytrue.shape[0] print('Final Accuracy', tp / nb_node) print('Accuracy_score', sklearn.metrics.accuracy_score(Ytrue_l, lY_l)) print(sklearn.metrics.classification_report(Ytrue_l, lY_l)) self.assertAlmostEqual(tp / nb_node, node_acc) Z = [ lY_pred, [np.argmax(graph.Y, axis=1) for graph in gcn_graph_train] ] f = open('debug.pickle', 'wb') pickle.dump(Z, f, protocol=1, fix_imports=True) f.close()
def test_05_load_jl_pickle(self): pickle_fname = 'abp_CV_fold_1_tlXlY_trn.pkl' gcn_graph = GCNDataset.load_transkribus_pickle(pickle_fname) print(len(gcn_graph), 'loaded graph')
def main(_): if FLAGS.snake is True: pickle_train = '/home/meunier/Snake/snake_tlXlY_edge_trn.pkl' pickle_test = '/home/meunier/Snake/snake_tlXlY_edge_tst.pkl' #pickle_train = '/home/meunier/Snake/snake_tlXlY_trn.pkl' #pickle_test = '/home/meunier/Snake/snake_tlXlY_tst.pkl' #pickle_train = '/home/meunier/Snake/snake_tlXlY_fixed_trn.pkl' #pickle_test = '/home/meunier/Snake/snake_tlXlY_fixed_tst.pkl' #pickle_train='/home/meunier/Snake/snake_tlXlY_2_fixed_trn.pkl' #pickle_test='/home/meunier/Snake/snake_tlXlY_2_fixed_tst.pkl' train_graph = GCNDataset.load_snake_pickle(pickle_train) test_graph = GCNDataset.load_snake_pickle(pickle_test) config = get_config(FLAGS.configid) acc_test = run_model(train_graph, config, test_graph) print('Accuracy Test', acc_test) elif FLAGS.das_train is True: #Load all the files of table # Train the model graph_train = [] debug = True if debug: pickle_train = '/nfs/project/read/testJL/TABLE/das_abp_models/abp_full_tlXlY_trn.pkl' pickle_train_ra = '/nfs/project/read/testJL/TABLE/abp_DAS_CRF_Xr.pkl' print(pickle_train_ra, pickle_train) #train_graph = GCNDataset.load_transkribus_pickle(pickle_train) graph_train = GCNDataset.load_transkribus_reverse_arcs_pickle( pickle_train, pickle_train_ra, format_reverse='lx') else: i = 1 pickle_train = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str( i) + '_tlXlY_trn.pkl' pickle_test = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str( i) + '_tlXlY_tst.pkl' # reversed edged pickle_train_ra = '/nfs/project/read/testJL/TABLE/das_abp_models/abp_CV_fold_' + str( i) + '_tlXrlY_trn.pkl' pickle_test_ra = '/nfs/project/read/testJL/TABLE/das_abp_models/abp_CV_fold_' + str( i) + '_tlXrlY_tst.pkl' train_graph = GCNDataset.load_transkribus_reverse_arcs_pickle( pickle_train, pickle_train_ra) test_graph = GCNDataset.load_transkribus_reverse_arcs_pickle( pickle_test, pickle_test_ra) graph_train.extend(train_graph) graph_train.extend(test_graph) print('Graph Train Nb', len(graph_train)) #Load the other dataset for predictions configid = FLAGS.configid config = get_config(configid) #config['nb_iter'] = 100 dirp = os.path.join('models_all', 'C' + str(configid)) mkdir_p(dirp) save_model_dir = os.path.join( dirp, 'alldas_exp1_C' + str(configid) + '.ckpt') #I should save the pickle outpicklefname = os.path.join( dirp, 'alldas_exp1_C' + str(configid) + '.validation_scores.pickle') run_model_train_val_test(graph_train, config, outpicklefname, ratio_train_val=0.1, save_model_path=save_model_dir) #for test add gcn_graph_test=train_graph elif FLAGS.das_predict is True: do_test = False #some internal flags to do some testing node_dim = 29 edge_dim = 140 nb_class = 5 configid = FLAGS.configid config = get_config(configid) #Get the best file #TODO Get the best file #node_dim = gcn_graph[0].X.shape[1] #edge_dim = gcn_graph[0].E.shape[1] - 2.0 #nb_class = gcn_graph[0].Y.shape[1] #f = open('archive_models/das_exp1_C31.validation_scores.pickle', 'rb') val_pickle = os.path.join( 'models_all', 'C' + str(configid), "alldas_exp1_C" + str(configid) + '.validation_scores.pickle') print('Reading Training Info from:', val_pickle) f = open(val_pickle, 'rb') R = pickle.load(f) val = R['val_acc'] print('Validation scores', val) epoch_index = np.argmax(val) print('Best performance on val set: Epoch', epoch_index) gcn_model = gcn_models.EdgeConvNet( node_dim, edge_dim, nb_class, num_layers=config['num_layers'], learning_rate=config['lr'], mu=config['mu'], node_indim=config['node_indim'], nconv_edge=config['nconv_edge'], ) gcn_model.stack_instead_add = config['stack_instead_add'] if 'fast_convolve' in config: gcn_model.fast_convolve = config['fast_convolve'] gcn_model.create_model() if do_test: graph_train = [] for i in range(1, 5): pickle_train = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str( i) + '_tlXlY_trn.pkl' print('loading ', pickle_train) train_graph = GCNDataset.load_transkribus_pickle(pickle_train) graph_train.extend(train_graph) #TODO load the data for test #/nfs/project/read/testJL/TABLE/abp_DAS_col9142_CRF_X.pkl if FLAGS.das_predict_workflow: pickle_predict = '/nfs/project/read/testJL/TABLE/abp_DAS_col9142_workflow_X.pkl' pickle_predict_ra = '/nfs/project/read/testJL/TABLE/abp_DAS_col9142_workflow_Xr.pkl' else: pickle_predict = '/nfs/project/read/testJL/TABLE/abp_DAS_col9142_CRF_X.pkl' pickle_predict_ra = '/nfs/project/read/testJL/TABLE/abp_DAS_col9142_CRF_Xr.pkl' print('loading ', pickle_predict, pickle_predict_ra) predict_graph = GCNDataset.load_test_pickle( pickle_predict, nb_class, pickle_reverse_arc=pickle_predict_ra) with tf.Session() as session: # Restore variables from disk. session.run(gcn_model.init) if do_test: gcn_model.restore_model(session, "models/das_exp1_C31.ckpt-99") print('Loaded models') graphAcc, node_acc = gcn_model.test_lG(session, graph_train) print(graphAcc, node_acc) model_path = os.path.join( 'models_all', 'C' + str(configid), "alldas_exp1_C" + str(configid) + ".ckpt-" + str(10 * epoch_index)) print('Model_path', model_path) gcn_model.restore_model(session, model_path) print('Loaded models') start_time = time.time() lY_pred = gcn_model.predict_lG(session, predict_graph, verbose=False) end_time = time.time() print("--- %s seconds ---" % (end_time - start_time)) print('Number of graphs:', len(lY_pred)) #Convert to list as Python pickle does not seem like the array while the list can be pickled lY_list = [] for x in lY_pred: lY_list.append(list(x)) #print(lY_list) if FLAGS.das_predict_workflow: outpicklefname = 'allmodel_das_predict_C' + str( configid) + '_workflow.pickle' else: outpicklefname = 'allmodel_das_predict_C' + str( configid) + '.pickle' g = open(outpicklefname, 'wb') #print(lY_pred) pickle.dump(lY_pred, g, protocol=2, fix_imports=True) g.close() elif FLAGS.qsub_taskid > -1: GRID = _make_grid_qsub(0) try: fold_id, configid = GRID[FLAGS.qsub_taskid] except: print('Invalid Grid Parameters', FLAGS.qsub_taskid, GRID) return -1 print('Experiement with FOLD', fold_id, ' CONFIG', configid) pickle_train = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str( fold_id) + '_tlXlY_trn.pkl' pickle_test = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str( fold_id) + '_tlXlY_tst.pkl' train_graph = GCNDataset.load_transkribus_pickle(pickle_train) test_graph = GCNDataset.load_transkribus_pickle(pickle_test) config = get_config(configid) if os.path.exists(FLAGS.out_dir) is False: print('Creating Dir', FLAGS.out_dir) os.mkdir(FLAGS.out_dir) outpicklefname = os.path.join( FLAGS.out_dir, 'table_F' + str(fold_id) + '_C' + str(configid) + '.pickle') run_model_train_val_test(train_graph, config, outpicklefname, ratio_train_val=0.1, gcn_graph_test=test_graph) else: if FLAGS.fold == -1: #Do it on all the fold for the specified configs FOLD_IDS = [1, 2, 3, 4] sel_configs_ = FLAGS.grid_configs.split('_') sel_configs = [int(x) for x in sel_configs_] print('GRID on FOLDS', FOLD_IDS) print('Model Configs', sel_configs) for cid in sel_configs: for fid in FOLD_IDS: print('Running Fold', fid, 'on Config', cid) main_fold(fid, cid, FLAGS.out_dir) else: pickle_train = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str( FLAGS.fold) + '_tlXlY_trn.pkl' pickle_test = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str( FLAGS.fold) + '_tlXlY_tst.pkl' #reversed edged pickle_train_ra = '/nfs/project/read/testJL/TABLE/das_abp_models/abp_CV_fold_' + str( FLAGS.fold) + '_tlXrlY_trn.pkl' pickle_test_ra = '/nfs/project/read/testJL/TABLE/das_abp_models/abp_CV_fold_' + str( FLAGS.fold) + '_tlXrlY_tst.pkl' #train_graph = GCNDataset.load_transkribus_pickle(pickle_train) train_graph = GCNDataset.load_transkribus_reverse_arcs_pickle( pickle_train, pickle_train_ra) print('Loaded Trained Graphs:', len(train_graph)) test_graph = GCNDataset.load_transkribus_reverse_arcs_pickle( pickle_test, pickle_test_ra) #test_graph = GCNDataset.load_transkribus_pickle(pickle_test, pickle_test_ra) print('Loaded Test Graphs:', len(test_graph)) config = get_config(FLAGS.configid) #acc_test = run_model(train_graph, config, test_graph,eval_iter=1) #print('Accuracy Test', acc_test) outpicklefname = os.path.join( FLAGS.out_dir, 'table_F' + str(FLAGS.fold) + '_C' + str(FLAGS.configid) + '.pickle') run_model_train_val_test(train_graph, config, outpicklefname, gcn_graph_test=test_graph)
def main(_): config = get_config(FLAGS.configid) print(config) mkdir_p(FLAGS.out_dir) # Pickle for Logit are sufficient pickle_train = os.path.join( FLAGS.dpath, 'abp_CV_fold_' + str(FLAGS.fold) + '_tlXlY_trn.pkl') pickle_test = os.path.join( FLAGS.dpath, 'abp_CV_fold_' + str(FLAGS.fold) + '_tlXlY_tst.pkl') # Baseline Models do not need reverse arc features if 'model' in config: train_graph = GCNDataset.load_transkribus_pickle(pickle_train) test_graph = GCNDataset.load_transkribus_pickle(pickle_test) print('Loaded Test Graphs:', len(test_graph)) if FLAGS.outname == 'default': outpicklefname = os.path.join( FLAGS.out_dir, 'table_F' + str(FLAGS.fold) + '_C' + str(FLAGS.configid) + '.pickle') else: outpicklefname = os.path.join(FLAGS.out_dir, FLAGS.outname) else: if FLAGS.das_predict_workflow is True: print('Doing Experiment on Predict Workflow ....') pickle_train = '/nfs/project/read/testJL/TABLE/das_abp_models/abp_full_tlXlY_trn.pkl' pickle_train_ra = '/nfs/project/read/testJL/TABLE/abp_DAS_CRF_Xr.pkl' print(pickle_train_ra, pickle_train) # train_graph = GCNDataset.load_transkribus_pickle(pickle_train) train_graph = GCNDataset.load_transkribus_reverse_arcs_pickle( pickle_train, pickle_train_ra, format_reverse='lx') fX_col9142 = '../../usecases/ABP/resources/DAS_2018/abp_DAS_col9142_CRF_X.pkl' fXr_col9142 = '../../usecases/ABP/resources/DAS_2018/abp_DAS_col9142_CRF_Xr.pkl' fY_col9142 = '../../usecases/ABP/resources/DAS_2018/DAS_col9142_l_Y_GT.pkl' test_graph = GCNDataset.load_transkribus_list_X_Xr_Y( fX_col9142, fXr_col9142, fY_col9142) if FLAGS.outname == 'default': outpicklefname = os.path.join( FLAGS.out_dir, 'col9142_C' + str(FLAGS.configid) + '.pickle') else: outpicklefname = os.path.join(FLAGS.out_dir, FLAGS.outname) else: pickle_train_ra = os.path.join( FLAGS.dpath, 'abp_CV_fold_' + str(FLAGS.fold) + '_tlXrlY_trn.pkl') pickle_test_ra = os.path.join( FLAGS.dpath, 'abp_CV_fold_' + str(FLAGS.fold) + '_tlXrlY_tst.pkl') train_graph = GCNDataset.load_transkribus_reverse_arcs_pickle( pickle_train, pickle_train_ra, attach_edge_label=True) test_graph = GCNDataset.load_transkribus_reverse_arcs_pickle( pickle_test, pickle_test_ra) if FLAGS.outname == 'default': outpicklefname = os.path.join( FLAGS.out_dir, 'table_F' + str(FLAGS.fold) + '_C' + str(FLAGS.configid) + '.pickle') else: outpicklefname = os.path.join(FLAGS.out_dir, FLAGS.outname) print('Loaded Trained Graphs:', len(train_graph)) print('Loaded Test Graphs:', len(test_graph)) run_model_train_val_test(train_graph, config, outpicklefname, gcn_graph_test=test_graph)