Beispiel #1
0
    def test_12_merge_graph(self):
        #3 nodes
        Xa = np.array([[1.0, 2.0], [6.3, 1.0], [4.3, -2.0]])
        Ea = np.array([[0, 1, 1, 0.5], [1, 2, 0, 0.2]])

        Xb = np.array([[6.3, 1.0], [1.3, -2.0]])
        Eb = np.array([[0, 1, 1, 0.5]])

        gA = GCNDataset('GA')
        gA.X = Xa
        gA.E = Ea

        gB = GCNDataset('GB')
        gB.X = Xb
        gB.E = Eb

        print('Graph A')
        print(gA.X, gA.E.shape)

        print('Graph B')
        print(gB.X, gB.E.shape)

        gc = GCNDataset.merge_graph(gA, gB)

        print(gc.X)
        print(gc.E)

        #TODO Test on Y too
        self.assertEquals(5, gc.X.shape[0])
        self.assertEquals(3, gc.E.shape[0])
Beispiel #2
0
    def convert_lX_lY_to_GCNDataset(self,
                                    lX,
                                    lY,
                                    training=False,
                                    test=False,
                                    predict=False):
        gcn_list = []
        graph_id = 0

        # This has state information here --> move that to DU_Model_ECN ...
        lys = []
        for _, ly in zip(lX, lY):
            lys.extend(list(ly))
        #print (lys)

        if training:
            self.labelBinarizer.fit(lys)

        for lx, ly in zip(lX, lY):
            nf = lx[0]
            edge = lx[1]
            ef = lx[2]
            nb_node = nf.shape[0]

            graph = GCNDataset(str(graph_id))
            graph.X = nf
            if training or test:
                graph.Y = self.labelBinarizer.transform(ly)

            elif predict:
                graph.Y = -np.ones(
                    (nb_node, len(self.labelBinarizer.classes_)), dtype='i')
            else:
                raise Exception(
                    'Invalid Usage: one of train,test,predict should be true')
            # We are making the adacency matrix here

            # print(edger)
            A1 = sp.coo_matrix(
                (np.ones(edge.shape[0]), (edge[:, 0], edge[:, 1])),
                shape=(nb_node, nb_node))
            # A2 = sp.coo_matrix((np.ones(edger.shape[0]), (edger[:, 0], edger[:, 1])), shape=(nb_node, nb_node))
            graph.A = A1  # + A2

            # JL: unued??   edge_normalizer = Normalizer()
            # Normalize EA

            E0 = np.hstack([edge, ef])  # check order
            # E1 = np.hstack([edger, efr])  # check order

            graph.E = E0
            #graph.compute_NA()
            graph.compute_NodeEdgeMat()

            gcn_list.append(graph)
            graph_id += 1

        return gcn_list
Beispiel #3
0
    def convert_X_to_GCNDataset(self, X):
        """
        Same code as above, dedicated to the predict mode (no need  for Y)
        """
        graph_id = 0

        nf = X[0]
        edge = X[1]
        ef = X[2]
        nb_node = nf.shape[0]

        graph = GCNDataset(str(graph_id))
        graph.X = nf
        graph.Y = -np.ones(
            (nb_node, len(self.labelBinarizer.classes_)), dtype='i')

        # print(edger)
        A1 = sp.coo_matrix((np.ones(edge.shape[0]), (edge[:, 0], edge[:, 1])),
                           shape=(nb_node, nb_node))
        # A2 = sp.coo_matrix((np.ones(edger.shape[0]), (edger[:, 0], edger[:, 1])), shape=(nb_node, nb_node))
        graph.A = A1  # + A2

        # JL: unued??   edge_normalizer = Normalizer()
        # Normalize EA

        E0 = np.hstack([edge, ef])  # check order
        # E1 = np.hstack([edger, efr])  # check order

        graph.E = E0
        #graph.compute_NA()
        graph.compute_NodeEdgeMat()

        return graph
Beispiel #4
0
    def test_predict_proba(self):
        pickle_fname = 'abp_CV_fold_1_tlXlY_trn.pkl'
        gcn_graph = GCNDataset.load_transkribus_pickle(pickle_fname)

        gcn_graph_train = [gcn_graph[8], gcn_graph[18], gcn_graph[29]]
        node_dim = gcn_graph[0].X.shape[1]
        edge_dim = gcn_graph[0].E.shape[1] - 2.0
        nb_class = gcn_graph[0].Y.shape[1]

        gcn_model = GraphAttNet(node_dim,
                                nb_class,
                                num_layers=1,
                                learning_rate=0.01,
                                node_indim=-1,
                                nb_attention=3)
        gcn_model.dropout_rate_node = 0.2
        gcn_model.dropout_rate_attention = 0.2
        gcn_model.create_model()

        with tf.Session() as session:
            session.run([gcn_model.init])
            # Get the Test Prediction
            gcn_model.train_lG(session, gcn_graph)

            g_proba = gcn_model.prediction_prob(session, gcn_graph_train[1])
            print(g_proba.shape)
            print(type(g_proba))
            print(gcn_graph_train[1].X.shape)
            self.assertTrue(g_proba.shape == (gcn_graph_train[1].X.shape[0],
                                              5))
Beispiel #5
0
    def test_graphattnet_train_dropout(self):
        pickle_fname = 'abp_CV_fold_1_tlXlY_trn.pkl'
        gcn_graph = GCNDataset.load_transkribus_pickle(pickle_fname)

        gcn_graph_train = [gcn_graph[8], gcn_graph[18], gcn_graph[29]]
        node_dim = gcn_graph[0].X.shape[1]
        edge_dim = gcn_graph[0].E.shape[1] - 2.0
        nb_class = gcn_graph[0].Y.shape[1]

        gcn_model = GraphAttNet(node_dim,
                                nb_class,
                                num_layers=1,
                                learning_rate=0.01,
                                node_indim=-1,
                                nb_attention=3)
        gcn_model.dropout_rate_node = 0.2
        gcn_model.dropout_rate_attention = 0.2
        gcn_model.create_model()

        with tf.Session() as session:
            session.run([gcn_model.init])
            # Get the Test Prediction
            g_acc, node_acc = gcn_model.test_lG(session, gcn_graph_train)
            print('Mean Accuracy', g_acc, node_acc)
            gcn_model.train_lG(session, gcn_graph)
            g_acc, node_acc = gcn_model.test_lG(session, gcn_graph_train)
            print('Mean Accuracy', g_acc, node_acc)
Beispiel #6
0
    def test_logit_convolve(self):
        # 3 nodes a;b,c  a<->b and c<->b   a->b<c>
        X = np.array([[1.0, 2.0], [6.3, 1.0], [4.3, -2.0]])
        Y = np.array([[1, 0], [0, 1.0], [1.0, 0.0]])
        E = np.array([
            [0, 1, 1.0, 1, 0],  #edge a->b
            [1, 0, 1.0, 0, 1],  #edge b->a
            [2, 1, 1.0, 0.0, 1.0]
        ])

        nb_node = 3
        gA = GCNDataset('GLogitConvolve')
        gA.X = X
        gA.Y = Y
        gA.E = E
        gA.A = sp.coo_matrix((np.ones(E.shape[0]), (E[:, 0], E[:, 1])),
                             shape=(nb_node, nb_node))

        gA.compute_NodeEdgeMat()
        gA.compute_NA()

        #Test in degree out_degree
        print(gA.in_degree, gA.out_degree)
        self.assertAlmostEqual(2, gA.in_degree[1])
        print(gA.NA_indegree)

        self.assertAlmostEqual(0.5, gA.NA_indegree[1, 0])
        #self.assertAlmostEqual(2, gA.indegree[1])
        #now assuming P(Y|a)=[1,0] P(Y|c)=[1,0] and current P(Y|b)=[0.5,0.5]
        pY = np.array([[1, 0], [0.5, 0.5], [0.8, 0.2]])
        #Node b  has two edges
        # Yt=[0 1;1 0]

        Yt = np.array([[0.0, 1.0], [1.0, 0.0]])

        pY_Yt = tf.matmul(pY, Yt, transpose_b=True)

        Yt_sum = EdgeConvNet.logitconvolve_fixed(pY, Yt, gA.NA_indegree)

        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)
            Ytt = session.run(pY_Yt)
            print(Ytt)
            Res = session.run(Yt_sum)
            print(Res)
Beispiel #7
0
    def test_train_ensemble_NN_model(self):
        #TODO Make a proper synthetic dataset for test Purpose
        pickle_fname = 'abp_CV_fold_1_tlXlY_trn.pkl'
        gcn_graph = GCNDataset.load_transkribus_pickle(pickle_fname)

        gcn_graph_train = [gcn_graph[8], gcn_graph[18], gcn_graph[29]]
        node_dim = gcn_graph[0].X.shape[1]
        edge_dim = gcn_graph[0].E.shape[1] - 2.0
        nb_class = gcn_graph[0].Y.shape[1]

        gat_model = GraphAttNet(node_dim,
                                nb_class,
                                num_layers=1,
                                learning_rate=0.01,
                                node_indim=-1,
                                nb_attention=3)
        gat_model.dropout_rate_node = 0.2
        gat_model.dropout_rate_attention = 0.2
        gat_model.create_model()

        nb_layers = 3
        lr = 0.001
        nb_conv = 2

        ecn_model = EdgeConvNet(
            node_dim,
            edge_dim,
            nb_class,
            num_layers=nb_layers,
            learning_rate=lr,
            mu=0.0,
            node_indim=-1,
            nconv_edge=nb_conv,
        )
        ecn_model.create_model()

        #Check Graphs
        #Are we recopying the models and graph definition implicitly ?
        ensemble = EnsembleGraphNN([ecn_model, gat_model])

        with tf.Session() as session:
            session.run([ensemble.models[0].init])

            for iter in range(500):
                ensemble.train_lG(session, gcn_graph_train)
            prediction = ensemble.predict_lG(session, gcn_graph_train)
            print(prediction)
            self.assertTrue(len(prediction) == len(gcn_graph_train))

            print('Ensemble Prediction')
            accs = ensemble.test_lG(session, gcn_graph_train)
            print('Base Predictions')
            for m in ensemble.models:
                accs = m.test_lG(session, gcn_graph_train)
                print(accs)

            print(accs)
Beispiel #8
0
def get_graph_test():
    #For graph att net
    X = np.array([[1.0, 0.5], [0.5, 0.5], [0.0, 1.0]], dtype='float32')
    E = np.array([[0, 1, 1.0], [1, 0, 1.0], [2, 1, 1.0], [1, 2, 1.0]],
                 dtype='float32')
    Y = np.array([[1, 0], [0, 1], [0, 1]], dtype='int32')

    gcn = GCNDataset('UT_test_1')
    gcn.X = X
    gcn.E = E
    gcn.Y = Y
    gcn.compute_NodeEdgeMat()
    return gcn
def main_fold(foldid, configid, outdir):
    '''
    Simple Fold experiment, loading one fold, train and test
    :param foldid:
    :param configid:
    :param outdir:
    :return:
    '''
    pickle_train = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str(
        foldid) + '_tlXlY_trn.pkl'
    pickle_test = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str(
        foldid) + '_tlXlY_tst.pkl'

    train_graph = GCNDataset.load_transkribus_pickle(pickle_train)
    test_graph = GCNDataset.load_transkribus_pickle(pickle_test)

    config = get_config(configid)
    #acc_test = run_model(train_graph, config, test_graph)
    #print('Accuracy Test', acc_test)

    outpicklefname = os.path.join(
        FLAGS.out_dir,
        'table_F' + str(FLAGS.fold) + '_C' + str(FLAGS.configid) + '.pickle')
    run_model_train_val_test(train_graph, config, test_graph, outpicklefname)
Beispiel #10
0
    def test_predict(self):
        pickle_fname = 'abp_CV_fold_1_tlXlY_trn.pkl'
        gcn_graph = GCNDataset.load_transkribus_pickle(pickle_fname)

        gcn_graph_train = [gcn_graph[8], gcn_graph[18], gcn_graph[29]]
        node_dim = gcn_graph[0].X.shape[1]
        edge_dim = gcn_graph[0].E.shape[1] - 2.0
        nb_class = gcn_graph[0].Y.shape[1]

        gcn_model = EdgeConvNet(node_dim,
                                edge_dim,
                                nb_class,
                                num_layers=2,
                                learning_rate=0.01,
                                mu=0.0,
                                node_indim=-1,
                                nconv_edge=5)
        # gcn_model =EdgeConvNet(node_dim,edge_dim,nb_class,num_layers=1,learning_rate=0.001,mu=0.0,node_indim=-1)
        gcn_model.stack_instead_add = True
        gcn_model.fast_convolve = True

        gcn_model.create_model()

        # pdb.set_trace()

        nb_iter = 50
        with tf.Session() as session:
            session.run([gcn_model.init])
            # Sample each graph
            # random
            for i in range(nb_iter):
                gcn_model.train_lG(session, gcn_graph_train)

            #Get the Test Prediction
            g_acc, node_acc = gcn_model.test_lG(session, gcn_graph_train)
            print('Mean Accuracy', g_acc, node_acc)
            # Get the Test Prediction

            lY_pred = gcn_model.predict_lG(session,
                                           gcn_graph_train,
                                           verbose=False)

        tp = 0
        nb_node = 0

        Ytrue_l = []
        lY_l = []
        for lY, graph in zip(lY_pred, gcn_graph_train):
            Ytrue = np.argmax(graph.Y, axis=1)
            Ytrue_l.extend(Ytrue)
            lY_l.extend(lY)
            tp += sum(Ytrue == lY)
            #pdb.set_trace()
            nb_node += Ytrue.shape[0]

        print('Final Accuracy', tp / nb_node)
        print('Accuracy_score', sklearn.metrics.accuracy_score(Ytrue_l, lY_l))
        print(sklearn.metrics.classification_report(Ytrue_l, lY_l))
        self.assertAlmostEqual(tp / nb_node, node_acc)

        Z = [
            lY_pred, [np.argmax(graph.Y, axis=1) for graph in gcn_graph_train]
        ]
        f = open('debug.pickle', 'wb')
        pickle.dump(Z, f, protocol=1, fix_imports=True)
        f.close()
Beispiel #11
0
    def test_05_load_jl_pickle(self):

        pickle_fname = 'abp_CV_fold_1_tlXlY_trn.pkl'
        gcn_graph = GCNDataset.load_transkribus_pickle(pickle_fname)
        print(len(gcn_graph), 'loaded graph')
Beispiel #12
0
 def test_01_load(self):
     dataset = GCNDataset('UT_iris_0')
     dataset.load_pickle('iris_graph.pickle')
     dataset.print_stats()
     return True
def main(_):

    if FLAGS.snake is True:

        pickle_train = '/home/meunier/Snake/snake_tlXlY_edge_trn.pkl'
        pickle_test = '/home/meunier/Snake/snake_tlXlY_edge_tst.pkl'

        #pickle_train = '/home/meunier/Snake/snake_tlXlY_trn.pkl'
        #pickle_test =  '/home/meunier/Snake/snake_tlXlY_tst.pkl'

        #pickle_train = '/home/meunier/Snake/snake_tlXlY_fixed_trn.pkl'
        #pickle_test =  '/home/meunier/Snake/snake_tlXlY_fixed_tst.pkl'

        #pickle_train='/home/meunier/Snake/snake_tlXlY_2_fixed_trn.pkl'
        #pickle_test='/home/meunier/Snake/snake_tlXlY_2_fixed_tst.pkl'

        train_graph = GCNDataset.load_snake_pickle(pickle_train)
        test_graph = GCNDataset.load_snake_pickle(pickle_test)

        config = get_config(FLAGS.configid)
        acc_test = run_model(train_graph, config, test_graph)
        print('Accuracy Test', acc_test)

    elif FLAGS.das_train is True:
        #Load all the files of table
        # Train the model
        graph_train = []

        debug = True
        if debug:

            pickle_train = '/nfs/project/read/testJL/TABLE/das_abp_models/abp_full_tlXlY_trn.pkl'
            pickle_train_ra = '/nfs/project/read/testJL/TABLE/abp_DAS_CRF_Xr.pkl'
            print(pickle_train_ra, pickle_train)
            #train_graph = GCNDataset.load_transkribus_pickle(pickle_train)
            graph_train = GCNDataset.load_transkribus_reverse_arcs_pickle(
                pickle_train, pickle_train_ra, format_reverse='lx')
        else:
            i = 1
            pickle_train = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str(
                i) + '_tlXlY_trn.pkl'
            pickle_test = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str(
                i) + '_tlXlY_tst.pkl'

            # reversed edged
            pickle_train_ra = '/nfs/project/read/testJL/TABLE/das_abp_models/abp_CV_fold_' + str(
                i) + '_tlXrlY_trn.pkl'
            pickle_test_ra = '/nfs/project/read/testJL/TABLE/das_abp_models/abp_CV_fold_' + str(
                i) + '_tlXrlY_tst.pkl'

            train_graph = GCNDataset.load_transkribus_reverse_arcs_pickle(
                pickle_train, pickle_train_ra)
            test_graph = GCNDataset.load_transkribus_reverse_arcs_pickle(
                pickle_test, pickle_test_ra)

            graph_train.extend(train_graph)
            graph_train.extend(test_graph)

        print('Graph Train Nb', len(graph_train))
        #Load the other dataset for predictions
        configid = FLAGS.configid
        config = get_config(configid)
        #config['nb_iter'] = 100

        dirp = os.path.join('models_all', 'C' + str(configid))
        mkdir_p(dirp)
        save_model_dir = os.path.join(
            dirp, 'alldas_exp1_C' + str(configid) + '.ckpt')
        #I should  save the pickle
        outpicklefname = os.path.join(
            dirp,
            'alldas_exp1_C' + str(configid) + '.validation_scores.pickle')
        run_model_train_val_test(graph_train,
                                 config,
                                 outpicklefname,
                                 ratio_train_val=0.1,
                                 save_model_path=save_model_dir)
        #for test add gcn_graph_test=train_graph

    elif FLAGS.das_predict is True:

        do_test = False  #some internal flags to do some testing

        node_dim = 29
        edge_dim = 140
        nb_class = 5

        configid = FLAGS.configid
        config = get_config(configid)

        #Get the best file
        #TODO Get the best file
        #node_dim = gcn_graph[0].X.shape[1]
        #edge_dim = gcn_graph[0].E.shape[1] - 2.0
        #nb_class = gcn_graph[0].Y.shape[1]

        #f = open('archive_models/das_exp1_C31.validation_scores.pickle', 'rb')

        val_pickle = os.path.join(
            'models_all', 'C' + str(configid),
            "alldas_exp1_C" + str(configid) + '.validation_scores.pickle')
        print('Reading Training Info from:', val_pickle)
        f = open(val_pickle, 'rb')
        R = pickle.load(f)
        val = R['val_acc']
        print('Validation scores', val)

        epoch_index = np.argmax(val)
        print('Best performance on val set: Epoch', epoch_index)

        gcn_model = gcn_models.EdgeConvNet(
            node_dim,
            edge_dim,
            nb_class,
            num_layers=config['num_layers'],
            learning_rate=config['lr'],
            mu=config['mu'],
            node_indim=config['node_indim'],
            nconv_edge=config['nconv_edge'],
        )

        gcn_model.stack_instead_add = config['stack_instead_add']

        if 'fast_convolve' in config:
            gcn_model.fast_convolve = config['fast_convolve']

        gcn_model.create_model()

        if do_test:
            graph_train = []
            for i in range(1, 5):
                pickle_train = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str(
                    i) + '_tlXlY_trn.pkl'
                print('loading ', pickle_train)
                train_graph = GCNDataset.load_transkribus_pickle(pickle_train)
                graph_train.extend(train_graph)

        #TODO load the data for test
        #/nfs/project/read/testJL/TABLE/abp_DAS_col9142_CRF_X.pkl
        if FLAGS.das_predict_workflow:
            pickle_predict = '/nfs/project/read/testJL/TABLE/abp_DAS_col9142_workflow_X.pkl'
            pickle_predict_ra = '/nfs/project/read/testJL/TABLE/abp_DAS_col9142_workflow_Xr.pkl'
        else:
            pickle_predict = '/nfs/project/read/testJL/TABLE/abp_DAS_col9142_CRF_X.pkl'
            pickle_predict_ra = '/nfs/project/read/testJL/TABLE/abp_DAS_col9142_CRF_Xr.pkl'

        print('loading ', pickle_predict, pickle_predict_ra)
        predict_graph = GCNDataset.load_test_pickle(
            pickle_predict, nb_class, pickle_reverse_arc=pickle_predict_ra)

        with tf.Session() as session:
            # Restore variables from disk.
            session.run(gcn_model.init)

            if do_test:
                gcn_model.restore_model(session, "models/das_exp1_C31.ckpt-99")
                print('Loaded models')

                graphAcc, node_acc = gcn_model.test_lG(session, graph_train)
                print(graphAcc, node_acc)

            model_path = os.path.join(
                'models_all', 'C' + str(configid), "alldas_exp1_C" +
                str(configid) + ".ckpt-" + str(10 * epoch_index))
            print('Model_path', model_path)
            gcn_model.restore_model(session, model_path)
            print('Loaded models')

            start_time = time.time()
            lY_pred = gcn_model.predict_lG(session,
                                           predict_graph,
                                           verbose=False)
            end_time = time.time()
            print("--- %s seconds ---" % (end_time - start_time))
            print('Number of graphs:', len(lY_pred))

            #Convert to list as Python pickle does not  seem like the array while the list can be pickled
            lY_list = []
            for x in lY_pred:
                lY_list.append(list(x))

            #print(lY_list)
            if FLAGS.das_predict_workflow:
                outpicklefname = 'allmodel_das_predict_C' + str(
                    configid) + '_workflow.pickle'
            else:
                outpicklefname = 'allmodel_das_predict_C' + str(
                    configid) + '.pickle'
            g = open(outpicklefname, 'wb')
            #print(lY_pred)
            pickle.dump(lY_pred, g, protocol=2, fix_imports=True)
            g.close()

    elif FLAGS.qsub_taskid > -1:

        GRID = _make_grid_qsub(0)

        try:
            fold_id, configid = GRID[FLAGS.qsub_taskid]
        except:
            print('Invalid Grid Parameters', FLAGS.qsub_taskid, GRID)
            return -1
        print('Experiement with FOLD', fold_id, ' CONFIG', configid)
        pickle_train = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str(
            fold_id) + '_tlXlY_trn.pkl'
        pickle_test = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str(
            fold_id) + '_tlXlY_tst.pkl'

        train_graph = GCNDataset.load_transkribus_pickle(pickle_train)
        test_graph = GCNDataset.load_transkribus_pickle(pickle_test)

        config = get_config(configid)

        if os.path.exists(FLAGS.out_dir) is False:
            print('Creating Dir', FLAGS.out_dir)
            os.mkdir(FLAGS.out_dir)

        outpicklefname = os.path.join(
            FLAGS.out_dir,
            'table_F' + str(fold_id) + '_C' + str(configid) + '.pickle')
        run_model_train_val_test(train_graph,
                                 config,
                                 outpicklefname,
                                 ratio_train_val=0.1,
                                 gcn_graph_test=test_graph)

    else:

        if FLAGS.fold == -1:
            #Do it on all the fold for the specified configs
            FOLD_IDS = [1, 2, 3, 4]
            sel_configs_ = FLAGS.grid_configs.split('_')
            sel_configs = [int(x) for x in sel_configs_]
            print('GRID on FOLDS', FOLD_IDS)
            print('Model Configs', sel_configs)

            for cid in sel_configs:
                for fid in FOLD_IDS:
                    print('Running Fold', fid, 'on Config', cid)
                    main_fold(fid, cid, FLAGS.out_dir)

        else:

            pickle_train = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str(
                FLAGS.fold) + '_tlXlY_trn.pkl'
            pickle_test = '/nfs/project/read/testJL/TABLE/abp_quantile_models/abp_CV_fold_' + str(
                FLAGS.fold) + '_tlXlY_tst.pkl'

            #reversed edged
            pickle_train_ra = '/nfs/project/read/testJL/TABLE/das_abp_models/abp_CV_fold_' + str(
                FLAGS.fold) + '_tlXrlY_trn.pkl'
            pickle_test_ra = '/nfs/project/read/testJL/TABLE/das_abp_models/abp_CV_fold_' + str(
                FLAGS.fold) + '_tlXrlY_tst.pkl'

            #train_graph = GCNDataset.load_transkribus_pickle(pickle_train)
            train_graph = GCNDataset.load_transkribus_reverse_arcs_pickle(
                pickle_train, pickle_train_ra)
            print('Loaded Trained Graphs:', len(train_graph))
            test_graph = GCNDataset.load_transkribus_reverse_arcs_pickle(
                pickle_test, pickle_test_ra)
            #test_graph = GCNDataset.load_transkribus_pickle(pickle_test, pickle_test_ra)
            print('Loaded Test Graphs:', len(test_graph))

            config = get_config(FLAGS.configid)

            #acc_test = run_model(train_graph, config, test_graph,eval_iter=1)
            #print('Accuracy Test', acc_test)

            outpicklefname = os.path.join(
                FLAGS.out_dir, 'table_F' + str(FLAGS.fold) + '_C' +
                str(FLAGS.configid) + '.pickle')
            run_model_train_val_test(train_graph,
                                     config,
                                     outpicklefname,
                                     gcn_graph_test=test_graph)
Beispiel #14
0
def main(_):
    config = get_config(FLAGS.configid)
    print(config)

    mkdir_p(FLAGS.out_dir)

    # Pickle for Logit are sufficient
    pickle_train = os.path.join(
        FLAGS.dpath, 'abp_CV_fold_' + str(FLAGS.fold) + '_tlXlY_trn.pkl')
    pickle_test = os.path.join(
        FLAGS.dpath, 'abp_CV_fold_' + str(FLAGS.fold) + '_tlXlY_tst.pkl')

    # Baseline Models do not need reverse arc features
    if 'model' in config:
        train_graph = GCNDataset.load_transkribus_pickle(pickle_train)
        test_graph = GCNDataset.load_transkribus_pickle(pickle_test)
        print('Loaded Test Graphs:', len(test_graph))

        if FLAGS.outname == 'default':
            outpicklefname = os.path.join(
                FLAGS.out_dir, 'table_F' + str(FLAGS.fold) + '_C' +
                str(FLAGS.configid) + '.pickle')
        else:
            outpicklefname = os.path.join(FLAGS.out_dir, FLAGS.outname)

    else:

        if FLAGS.das_predict_workflow is True:
            print('Doing Experiment on Predict Workflow ....')
            pickle_train = '/nfs/project/read/testJL/TABLE/das_abp_models/abp_full_tlXlY_trn.pkl'
            pickle_train_ra = '/nfs/project/read/testJL/TABLE/abp_DAS_CRF_Xr.pkl'
            print(pickle_train_ra, pickle_train)
            # train_graph = GCNDataset.load_transkribus_pickle(pickle_train)
            train_graph = GCNDataset.load_transkribus_reverse_arcs_pickle(
                pickle_train, pickle_train_ra, format_reverse='lx')

            fX_col9142 = '../../usecases/ABP/resources/DAS_2018/abp_DAS_col9142_CRF_X.pkl'
            fXr_col9142 = '../../usecases/ABP/resources/DAS_2018/abp_DAS_col9142_CRF_Xr.pkl'
            fY_col9142 = '../../usecases/ABP/resources/DAS_2018/DAS_col9142_l_Y_GT.pkl'

            test_graph = GCNDataset.load_transkribus_list_X_Xr_Y(
                fX_col9142, fXr_col9142, fY_col9142)

            if FLAGS.outname == 'default':
                outpicklefname = os.path.join(
                    FLAGS.out_dir,
                    'col9142_C' + str(FLAGS.configid) + '.pickle')
            else:
                outpicklefname = os.path.join(FLAGS.out_dir, FLAGS.outname)

        else:
            pickle_train_ra = os.path.join(
                FLAGS.dpath,
                'abp_CV_fold_' + str(FLAGS.fold) + '_tlXrlY_trn.pkl')
            pickle_test_ra = os.path.join(
                FLAGS.dpath,
                'abp_CV_fold_' + str(FLAGS.fold) + '_tlXrlY_tst.pkl')
            train_graph = GCNDataset.load_transkribus_reverse_arcs_pickle(
                pickle_train, pickle_train_ra, attach_edge_label=True)
            test_graph = GCNDataset.load_transkribus_reverse_arcs_pickle(
                pickle_test, pickle_test_ra)

            if FLAGS.outname == 'default':
                outpicklefname = os.path.join(
                    FLAGS.out_dir, 'table_F' + str(FLAGS.fold) + '_C' +
                    str(FLAGS.configid) + '.pickle')
            else:
                outpicklefname = os.path.join(FLAGS.out_dir, FLAGS.outname)

        print('Loaded Trained Graphs:', len(train_graph))
        print('Loaded Test Graphs:', len(test_graph))

    run_model_train_val_test(train_graph,
                             config,
                             outpicklefname,
                             gcn_graph_test=test_graph)