def main(args):
    '''
    Pipeline for Graph Attention Autoencoder.
    '''

    G, X, Y, idx_train, idx_val, idx_test = process.load_data(args.dataset)

    # add feature dimension size to the beginning of hidden_dims
    feature_dim = X.shape[1]
    args.hidden_dims = [feature_dim] + args.hidden_dims

    # prepare the data
    G_tf, S, R = process.prepare_graph_data(G)

    # Train the Model
    trainer = Trainer(args)
    trainer(G_tf, X, S, R)
    embeddings, attentions = trainer.infer(G_tf, X, S, R)

    # Evaluate the quality of embeddings
    classifier = Classifier(vectors=embeddings)
    f1s = classifier(idx_train, idx_test, idx_val, Y, seed=0)
    print f1s
Exemple #2
0
def main():

    saved_graph = os.path.join('assets', 'saved_graphs', 'best_dgi.pickle')
    saved_logreg = os.path.join('assets', 'saved_graphs', 'best_logreg.pickle')

    dataset = 'cora'

    # training params
    batch_size = 1
    nb_epochs = 10000
    patience = 25
    lr = 0.001
    l2_coef = 0.0
    drop_prob = 0.0
    hid_units = 512
    sparse = True
    nonlinearity = 'prelu' # special name to separate parameters

    adj, features, labels, idx_train, idx_test, idx_val = process.load_data(dataset)

    features, _ = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = labels.shape[1]

    adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))

    if sparse:
        adj = process.sparse_mx_to_torch_sparse_tensor(adj)
    else:
        adj = (adj + sp.eye(adj.shape[0])).todense()

    features = torch.FloatTensor(features[np.newaxis])
    if not sparse:
        adj = torch.FloatTensor(adj[np.newaxis])
    labels = torch.FloatTensor(labels[np.newaxis])
    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)

    print("Training Nodes: {}, Testing Nodes: {}, Validation Nodes: {}".format(len(idx_train), len(idx_test), len(idx_val)))

    model = DGI(ft_size, hid_units, nonlinearity)
    optimiser = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2_coef)

    if torch.cuda.is_available():
        print('Using CUDA')
        model.cuda()
        features = features.cuda()
        if sparse:
            sp_adj = sp_adj.cuda()
        else:
            adj = adj.cuda()
        labels = labels.cuda()
        idx_train = idx_train.cuda()
        idx_val = idx_val.cuda()
        idx_test = idx_test.cuda()

    b_xent = nn.BCEWithLogitsLoss()
    xent = nn.CrossEntropyLoss()
    cant_wait = 0
    best = 1e9
    best_t = 0

    if not os.path.exists(saved_graph):
        pbar = trange(nb_epochs)
        for epoch in pbar:
            model.train()
            optimiser.zero_grad()

            idx = np.random.permutation(nb_nodes)
            shuf_fts = features[:, idx, :]

            lbl_1 = torch.ones(batch_size, nb_nodes)
            lbl_2 = torch.zeros(batch_size, nb_nodes)
            lbl = torch.cat((lbl_1, lbl_2), 1)

            if torch.cuda.is_available():
                shuf_fts = shuf_fts.cuda()
                lbl = lbl.cuda()

            logits = model(features, shuf_fts, adj, sparse, None, None, None)

            loss = b_xent(logits, lbl)

            pbar.desc = 'Loss: {:.4f}'.format(loss)

            if loss < best:
                best = loss
                best_t = epoch
                cnt_wait = 0
                torch.save(model.state_dict(), saved_graph)
            else:
                cant_wait += 1

            if cant_wait == patience:
                tqdm.write('Early stopping!')
                break

            loss.backward()
            optimiser.step()


    print('Loading {}th Epoch'.format(best_t) if best_t else 'Loading Existing Graph')
    model.load_state_dict(torch.load(saved_graph))

    embeds, _ = model.embed(features, adj, sparse, None)
    train_embs = embeds[0, idx_train]
    val_embs = embeds[0, idx_val]
    test_embs = embeds[0, idx_test]

    train_lbls = torch.argmax(labels[0, idx_train], dim=1)
    val_lbls = torch.argmax(labels[0, idx_val], dim=1)
    test_lbls = torch.argmax(labels[0, idx_test], dim=1)

    tot = torch.zeros(1)
    if torch.cuda.is_available():
        tot = tot.cuda()

    accs = []

    print("\nValidation:")
    pbar = trange(50)
    for _ in pbar:
        log = LogReg(hid_units, nb_classes)
        opt = torch.optim.Adam(log.parameters(), lr=0.01, weight_decay=0.0)

        pat_steps = 0
        best_acc = torch.zeros(1)
        if torch.cuda.is_available():
            log.cuda()
            best_acc = best_acc.cuda()
        for _ in range(100):
            log.train()
            opt.zero_grad()

            logits = log(train_embs)
            loss = xent(logits, train_lbls)

            loss.backward()
            opt.step()

        logits = log(test_embs)
        preds = torch.argmax(logits, dim=1)
        acc = torch.sum(preds == test_lbls).float() / test_lbls.shape[0]
        accs.append(acc * 100)
        pbar.desc = "Accuracy: {:.2f}%".format(100 * acc)
        tot += acc

    torch.save(log.state_dict(), saved_logreg)

    accs = torch.stack(accs)
    print('Average Accuracy: {:.2f}%'.format(accs.mean()))
    print('Standard Deviation: {:.3f}'.format(accs.std()))

    print("\nTesting")
    logits = log(val_embs)
    preds = torch.argmax(logits, dim=1)
    acc = torch.sum(preds == val_lbls).float() / val_lbls.shape[0]
    print("Accuracy: {:.2f}%".format(100 * acc))
Exemple #3
0
from utils import process
import numpy as np

_, _, labels, _, _, _ = process.load_data('cora')
stat = np.sum(labels, axis=0)
print('cora: ', stat)
_, _, labels, _, _, _ = process.load_data('pubmed')
stat = np.sum(labels, axis=0)
print('pubmed: ', stat)
_, _, labels, _, _, _ = process.load_data('citeseer')
stat = np.sum(labels, axis=0)
print('citeseer: ', stat)
Exemple #4
0
print('----- Archi. hyperparams -----')
print('nb. layers: ' + str(len(hid_units)))
print('nb. units per layer: ' + str(hid_units))
print('nb. attention heads: ' + str(n_heads))
print('residual: ' + str(residual))
print('nonlinearity: ' + str(nonlinearity))
print('model: ' + str(model))

validation = True

# Load data
if validation:
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data_2(
        dataset, seed, unlabel_prob)
else:
    adj, features, y_train, y_test, train_mask, test_mask = process.load_data(
        dataset, seed, unlabel_prob)
features, spars = process.preprocess_features(features)

nb_nodes = features.shape[0]
ft_size = features.shape[1]
nb_classes = y_train.shape[1]

features = features[np.newaxis]
y_train = y_train[np.newaxis]
y_val = y_val[np.newaxis]
y_test = y_test[np.newaxis]
train_mask = train_mask[np.newaxis]
val_mask = val_mask[np.newaxis]
test_mask = test_mask[np.newaxis]

if sparse:
Exemple #5
0
def main(args):
    """
    Pipeline for Graph Attention Auto-encoder.
    G, X, Y, idx_train, idx_val, idx_test = process.load_data(args.dataset)
    print('Graph的维度:' + str(G.shape))
    print('Content的维度:' + str(X.shape))
    Label = np.array([np.argmax(l) for l in Y])
    print('Label的维度:' + str(Label.shape))
    # add feature dimension size to the beginning of hidden_dims
    feature_dim = X.shape[1]
    args.hidden_dims = [feature_dim] + args.hidden_dims
    print('隐层单元的维度:' + str(args.hidden_dims))
    # prepare the data
    """

    # data_z = sio.loadmat('database/HW/nhandwritten_2views.mat')
    # data_dict = dict(data_z)
    # X1 = data_dict['x1']
    # X2 = data_dict['x2']
    # Label = data_dict['gt'].T
    # Label = np.squeeze(np.array(Label))
    # data_G = sio.loadmat('database/HW/hw5.mat')
    # g = dict(data_G)['hw5']
    # G = sp.coo_matrix(dict(data_G)['hw5'])
    # G_tf, S, R = process.prepare_graph_data(G)
    #
    # feature_dim1 = X1.shape[1]
    # args.hidden_dims1 = [feature_dim1] + args.hidden_dims1
    # feature_dim2 = X2.shape[1]
    # args.hidden_dims2 = [feature_dim2] + args.hidden_dims2
    #
    # print('Graph的维度:' + str(G.shape))
    # print('Content1的维度:' + str(X1.shape))
    # print('Content2的维度:' + str(X2.shape))
    # print('Label的维度:' + str(Label.shape))
    # print('隐层单元1的维度:' + str(args.hidden_dims1))
    # print('隐层单元2的维度:' + str(args.hidden_dims2))
    #
    # # PreTrain the Model
    # # fin = False
    # trainer = Trainer(args)
    # _ = trainer.assign(G_tf, X1, S, R, G_tf, X2, S, R)
    # # trainer(G_tf, X, S, R, Label, fin)
    # # Fintune the Model
    # fin = True
    # trainer(G_tf, X1, S, R, G_tf, X2, S, R, Label, fin)
    """
    Pipeline for Graph Attention Auto-encoder.
    """
    G, X, Y, idx_train, idx_val, idx_test = process.load_data(args.dataset)
    print('Graph的维度:' + str(G.shape))
    print('Content的维度:' + str(X.shape))
    Label = np.array([np.argmax(l) for l in Y])
    print('Label的维度:' + str(Label.shape))
    # add feature dimension size to the beginning of hidden_dims
    feature_dim1 = X.shape[1]
    args.hidden_dims1 = [feature_dim1] + args.hidden_dims1
    X2 = fft(X)
    feature_dim2 = X2.shape[1]
    args.hidden_dims2 = [feature_dim2] + args.hidden_dims2

    print('隐层单元1的维度:' + str(args.hidden_dims1))
    print('隐层单元2的维度:' + str(args.hidden_dims2))
    # prepare the data
    G_tf, S, R = process.prepare_graph_data(G)
    # PreTrain the Model
    # fin = False
    trainer = Trainer(args)
    _ = trainer.assign(G_tf, X, S, R, G_tf, X2, S, R)
    # trainer(G_tf, X, S, R, Label, fin)
    # Fintune the Model
    fin = True
    trainer(G_tf, X, S, R, G_tf, X2, S, R, Label, fin)
Exemple #6
0
from keras.optimizers import Adam
from keras.regularizers import l2

from keras_gat import GraphAttention
# from keras_gat.utils import load_data, preprocess_features
from utils.process import load_data, preprocess_features

tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True

# Read data
dataset = sys.argv[1]
if dataset not in ['cora', 'citeseer', 'pubmed', 'wiki']:
    print('invalid dataset')
    exit()
A, X, Y_train, Y_val, Y_test, idx_train, idx_val, idx_test = load_data(dataset)

# Parameters
N = X.shape[0]                # Number of nodes in the graph
F = X.shape[1]                # Original feature dimension
n_classes = Y_train.shape[1]  # Number of classes
F_ = 100                       # Output size of first GraphAttention layer
n_attn_heads = 8              # Number of attention heads in first GAT layer
dropout_rate = 0.5            # Dropout rate (between and inside GAT layers)
l2_reg = 5e-4/2               # Factor for l2 regularization
learning_rate = 0.0005          # Learning rate for Adam
epochs = 10000                # Number of training epochs
es_patience = 100             # Patience fot early stopping

# Preprocessing operations
X = preprocess_features(X)
Exemple #7
0
parser.add_argument('--beta', type=float, default=1.0,
                    help='parameter for I(h_i; x_j), node j is a neighbor (default: 1.0)')
parser.add_argument('--gamma', type=float, default=1.0,
                    help='parameter for I(w_ij; a_ij) (default: 1.0)')
parser.add_argument('--activation', default='prelu',
                    help='activation function')

###############################################
# This section of code adapted from Petar Veličković/DGI #
###############################################

args = parser.parse_args()
torch.cuda.set_device(args.gpu)

print('Loading ', args.dataset)
adj_ori, features, labels, idx_train, idx_val, idx_test = process.load_data(args.dataset)
features, _ = process.preprocess_features(features)

nb_nodes = features.shape[0]
ft_size = features.shape[1]
nb_classes = labels.shape[1]
adj = process.normalize_adj(adj_ori + sp.eye(adj_ori.shape[0]))

sp_adj = process.sparse_mx_to_torch_sparse_tensor(adj)
features = torch.FloatTensor(features[np.newaxis])
labels = torch.FloatTensor(labels[np.newaxis])
idx_train = torch.LongTensor(idx_train)
idx_val = torch.LongTensor(idx_val)
idx_test = torch.LongTensor(idx_test)

model = GMI(ft_size, args.hid_units, args.activation)
Exemple #8
0
def train():
    sparse = True

    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(dataset, train_size, validation_size)
    features, spars = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]

    features = features[np.newaxis]
    y_train = y_train[np.newaxis]
    y_val = y_val[np.newaxis]
    y_test = y_test[np.newaxis]
    train_mask = train_mask[np.newaxis]
    val_mask = val_mask[np.newaxis]
    test_mask = test_mask[np.newaxis]

    if sparse:
        biases = process.preprocess_adj_bias(adj)
    else:
        adj = adj.todense()
        adj = adj[np.newaxis]
        biases = process.adj_to_bias(adj, [nb_nodes], nhood=1)

    with tf.Graph().as_default():
        with tf.name_scope('input'):
            ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size))
            if sparse:
                #bias_idx = tf.placeholder(tf.int64)
                #bias_val = tf.placeholder(tf.float32)
                #bias_shape = tf.placeholder(tf.int64)
                bias_in = tf.sparse_placeholder(dtype=tf.float32)
            else:
                bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes))
            lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes))
            msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))
            attn_drop = tf.placeholder(dtype=tf.float32, shape=())
            ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
            is_train = tf.placeholder(dtype=tf.bool, shape=())

        logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train,
                                    attn_drop, ffd_drop,
                                    bias_mat=bias_in,
                                    hid_units=hid_units, n_heads=n_heads,
                                    residual=residual, activation=nonlinearity)
        log_resh = tf.reshape(logits, [-1, nb_classes])
        lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
        msk_resh = tf.reshape(msk_in, [-1])
        loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
        accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)

        train_op = model.training(loss, lr, l2_coef)

        saver = tf.train.Saver()

        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

        vlss_mn = np.inf
        vacc_mx = 0.0
        curr_step = 0

        gpu_options = tf.GPUOptions(allow_growth=True)
        with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
            sess.run(init_op)

            train_loss_avg = 0
            train_acc_avg = 0
            val_loss_avg = 0
            val_acc_avg = 0

            for epoch in range(nb_epochs):
                tr_step = 0
                tr_size = features.shape[0]

                while tr_step * batch_size < tr_size:
                    if sparse:
                        bbias = biases
                    else:
                        bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size]

                    _, loss_value_tr, acc_tr = sess.run([train_op, loss, accuracy],
                        feed_dict={
                            ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size],
                            bias_in: bbias,
                            lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size],
                            msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size],
                            is_train: True,
                            attn_drop: 0.6, ffd_drop: 0.6})
                    train_loss_avg += loss_value_tr
                    train_acc_avg += acc_tr
                    tr_step += 1

                if args.validate:
                    vl_step = 0
                    vl_size = features.shape[0]

                    while vl_step * batch_size < vl_size:
                        if sparse:
                            bbias = biases
                        else:
                            bbias = biases[vl_step*batch_size:(vl_step+1)*batch_size]
                        loss_value_vl, acc_vl = sess.run([loss, accuracy],
                            feed_dict={
                                ftr_in: features[vl_step*batch_size:(vl_step+1)*batch_size],
                                bias_in: bbias,
                                lbl_in: y_val[vl_step*batch_size:(vl_step+1)*batch_size],
                                msk_in: val_mask[vl_step*batch_size:(vl_step+1)*batch_size],
                                is_train: False,
                                attn_drop: 0.0, ffd_drop: 0.0})
                        val_loss_avg += loss_value_vl
                        val_acc_avg += acc_vl
                        vl_step += 1
                else:
                    tr_step = 0
                    vl_step = 0
                    vl_size = features.shape[0]
                    val_loss_avg = 0
                    val_acc_avg = 0
                    while tr_step * batch_size < tr_size:
                        if sparse:
                            bbias = biases
                        else:
                            bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size]
                        loss_value_vl, acc_vl = sess.run([loss, accuracy],
                            feed_dict={
                                ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size],
                                bias_in: bbias,
                                lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size],
                                msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size],
                                is_train: False,
                                attn_drop: 0., ffd_drop: 0.})
                        val_loss_avg += loss_value_vl
                        val_acc_avg += acc_vl
                        vl_step += 1
                        tr_step += 1

                print('%d Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' %
                        (epoch, train_loss_avg/tr_step, train_acc_avg/tr_step,
                        val_loss_avg/vl_step, val_acc_avg/vl_step))


                if val_acc_avg/vl_step > vacc_mx or val_loss_avg/vl_step < vlss_mn:
                    if val_acc_avg/vl_step >= vacc_mx and val_loss_avg/vl_step <= vlss_mn:
                        vacc_early_model = val_acc_avg/vl_step
                        vlss_early_model = val_loss_avg/vl_step
                        saver.save(sess, checkpt_file)

                        ts_size = features.shape[0]
                        ts_step = 0
                        ts_loss = 0.0
                        ts_acc = 0.0

                        while ts_step * batch_size < ts_size:
                            if sparse:
                                bbias = biases
                            else:
                                bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size]
                            loss_value_ts, acc_ts = sess.run([loss, accuracy],
                                                             feed_dict={
                                                                 ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size],
                                                                 bias_in: bbias,
                                                                 lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size],
                                                                 msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                                                                 is_train: False,
                                                                 attn_drop: 0.0, ffd_drop: 0.0})
                            ts_loss += loss_value_ts
                            ts_acc += acc_ts
                            ts_step += 1

                        print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step)

                    vacc_mx = np.max((val_acc_avg/vl_step, vacc_mx))
                    vlss_mn = np.min((val_loss_avg/vl_step, vlss_mn))
                    curr_step = 0
                else:
                    curr_step += 1
                    if curr_step == patience:
                        print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx)
                        print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model)
                        break

                train_loss_avg = 0
                train_acc_avg = 0
                val_loss_avg = 0
                val_acc_avg = 0

            saver.restore(sess, checkpt_file)

            ts_size = features.shape[0]
            ts_step = 0
            ts_loss = 0.0
            ts_acc = 0.0

            while ts_step * batch_size < ts_size:
                if sparse:
                    bbias = biases
                else:
                    bbias = biases[ts_step*batch_size:(ts_step+1)*batch_size]
                loss_value_ts, acc_ts = sess.run([loss, accuracy],
                    feed_dict={
                        ftr_in: features[ts_step*batch_size:(ts_step+1)*batch_size],
                        bias_in: bbias,
                        lbl_in: y_test[ts_step*batch_size:(ts_step+1)*batch_size],
                        msk_in: test_mask[ts_step*batch_size:(ts_step+1)*batch_size],
                        is_train: False,
                        attn_drop: 0.0, ffd_drop: 0.0})
                ts_loss += loss_value_ts
                ts_acc += acc_ts
                ts_step += 1

            print('Test loss:', ts_loss/ts_step, '; Test accuracy:', ts_acc/ts_step)

            sess.close()
            return ts_loss/ts_step, ts_acc/ts_step
Exemple #9
0
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    dataset = args.dataset
    # training params
    batch_size = 1
    nb_epochs = 100000
    patience = 100
    lr = args.lr  
    l2_coef = args.l2  
    hid_units = [args.units]  
    n_heads = [1, 1] # layers
    drop_out = args.drop
    residual = False
    nonlinearity = tf.nn.elu
    model = SpHGAT

    print('Dataset: ' + dataset)
    print('----- Opt. hyperparams -----')
    print('lr: ' + str(lr))
    print('l2_coef: ' + str(l2_coef))

    sparse = True

    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(dataset)
    features, spars = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]

    features = features[np.newaxis]
    y_train = y_train[np.newaxis]
    y_val = y_val[np.newaxis]
    y_test = y_test[np.newaxis]
    train_mask = train_mask[np.newaxis]
    val_mask = val_mask[np.newaxis]
    test_mask = test_mask[np.newaxis]

    if sparse:
        biases = process.preprocess_adj_bias(adj)
    else:
        adj = adj.todense()
        adj = adj[np.newaxis]
        biases = process.adj_to_bias(adj, [nb_nodes], nhood=1)

    with tf.Graph().as_default():
        with tf.name_scope('input'):
            ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size))
            if sparse:
                bias_in = tf.sparse_placeholder(dtype=tf.float32)
            else:
                bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes))
            lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes))
            msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))
            attn_drop = tf.placeholder(dtype=tf.float32, shape=())
            ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
            is_train = tf.placeholder(dtype=tf.bool, shape=())

        logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train,
                                    attn_drop, ffd_drop,
                                    bias_mat=bias_in,
                                    hid_units=hid_units, n_heads=n_heads,
                                    activation=nonlinearity)
        log_resh = tf.reshape(logits, [-1, nb_classes])
        lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
        msk_resh = tf.reshape(msk_in, [-1])
        loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
        accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)

        pred_all = tf.cast(tf.argmax(log_resh, 1), dtype=tf.int32)
        real_all = tf.cast(tf.argmax(lab_resh, 1), dtype=tf.int32)
        train_op = model.my_training(loss, lr, l2_coef)

        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())


        vlss_mn = np.inf
        vacc_mx = 0.0
        tlss_mn = 0.0
        tacc_mx = 0.0
        curr_step = 0
        with tf.Session(config=config) as sess:
            sess.run(init_op)
            train_loss_avg = 0
            train_acc_avg = 0
            val_loss_avg = 0
            val_acc_avg = 0

            for epoch in range(nb_epochs):
                tr_step = 0
                tr_size = features.shape[0]

                while tr_step * batch_size < tr_size:
                    if sparse:
                        bbias = biases
                    else:
                        bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size]

                    _, loss_value_tr, acc_tr = sess.run([train_op, loss, accuracy],
                        feed_dict={
                            ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size],
                            bias_in: bbias,
                            lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size],
                            msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size],
                            is_train: True,
                            attn_drop: drop_out, ffd_drop: drop_out})
                    train_loss_avg += loss_value_tr
                    train_acc_avg += acc_tr
                    tr_step += 1

                vl_step = 0
                vl_size = features.shape[0]

                while vl_step * batch_size < vl_size:
                    if sparse:
                        bbias = biases
                    else:
                        bbias = biases[vl_step*batch_size:(vl_step+1)*batch_size]
                    loss_value_vl, acc_vl = sess.run([loss, accuracy],
                        feed_dict={
                            ftr_in: features[vl_step*batch_size:(vl_step+1)*batch_size],
                            bias_in: bbias,
                            lbl_in: y_val[vl_step*batch_size:(vl_step+1)*batch_size],
                            msk_in: val_mask[vl_step*batch_size:(vl_step+1)*batch_size],
                            is_train: False,
                            attn_drop: 0.0, ffd_drop: 0.0})
                    val_loss_avg += loss_value_vl
                    val_acc_avg += acc_vl
                    vl_step += 1


                ts_size = features.shape[0]
                ts_step = 0
                ts_loss = 0.0
                ts_acc = 0.0

                print(epoch, 'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' %
                        (train_loss_avg/tr_step, train_acc_avg/tr_step,
                        val_loss_avg/vl_step, val_acc_avg/vl_step))

                if val_acc_avg/vl_step >= vacc_mx or val_loss_avg/vl_step <= vlss_mn:
                    vacc_mx = np.max((val_acc_avg/vl_step, vacc_mx))
                    vlss_mn = np.min((val_loss_avg/vl_step, vlss_mn))
                    curr_step = 0

                    while ts_step * batch_size < ts_size:
                        if sparse:
                            bbias = biases
                        else:
                            bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size]
                        loss_value_ts, acc_ts = sess.run(
                            [loss, accuracy],
                            feed_dict={
                                ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size],
                                bias_in: bbias,
                                lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size],
                                msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                                is_train: False,
                                attn_drop: 0.0, ffd_drop: 0.0})
                        ts_loss += loss_value_ts
                        ts_acc += acc_ts
                        ts_step += 1
                    tlss_mn = ts_loss / ts_step
                    tacc_mx = ts_acc / ts_step
                else:
                    curr_step += 1
                    if curr_step == patience:
                        print('Early stop! Min validation loss: ', vlss_mn, ', Max accuracy: ', vacc_mx)
                        print('Test loss:', tlss_mn, ', accuracy {}'.format(tacc_mx))
                        break
                train_loss_avg = 0
                train_acc_avg = 0
                val_loss_avg = 0
                val_acc_avg = 0
            sess.close()
Exemple #10
0
def train(sparse, epochs, lr, patience, l2_coef, hid_units, n_heads, residual,
          attention_drop, edge_attr_directory, node_features_path, label_path,
          log_path, train_ratio):
    # flags = tf.app.flags
    # FLAGS = flags.FLAGS
    nb_epochs = epochs

    # flags.DEFINE_string('summaries_dir', log_path, 'Summaries directory')
    if tf.gfile.Exists(log_path):
        tf.gfile.DeleteRecursively(log_path)
    tf.gfile.MakeDirs(log_path)

    checkpt_file = 'pre_trained/mod_test.ckpt'

    dataset = 'know'

    # training params
    batch_size = 1
    nonlinearity = tf.nn.elu
    if sparse:
        model = SpGAT
    else:
        model = GAT
    nhood = 1
    in_drop = attention_drop

    print('Dataset: ' + dataset)
    print('----- Opt. hyperparams -----')
    print('lr: ' + str(lr))
    print('l2_coef: ' + str(l2_coef))
    print('----- Archi. hyperparams -----')
    print('nb. layers: ' + str(len(hid_units)))
    print('nb. units per layer: ' + str(hid_units))
    print('nb. attention heads: ' + str(n_heads))
    print('residual: ' + str(residual))
    print('nonlinearity: ' + str(nonlinearity))
    print('model: ' + str(model))

    adjs, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_attr_name = process.load_data(
        edge_attr_directory, node_features_path, label_path, train_ratio)
    features, spars = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]

    features = features[np.newaxis]
    # adj = adj[np.newaxis]
    # adjs = [adj[np.newaxis] for adj in adjs]
    y_train = y_train[np.newaxis]
    y_val = y_val[np.newaxis]
    y_test = y_test[np.newaxis]
    train_mask = train_mask[np.newaxis]
    val_mask = val_mask[np.newaxis]
    test_mask = test_mask[np.newaxis]

    if sparse:
        biases = process.preprocess_adj_bias(
            adjs[0], to_unweighted=True
        )  # sparse (indices, values, dense_shape), the graph topologies (unweighted)
        adjs = [
            tf.SparseTensor(
                *process.preprocess_adj_bias(adj, to_unweighted=False))
            for adj in adjs
        ]
    else:
        biases = process.adj_to_bias(adjs[0], [nb_nodes], nhood=nhood)
    # biases = process.get_bias_mat(adjs[0], [nb_nodes], nhood=nhood)
    print(biases)

    # with tf.Graph().as_default():
    with tf.name_scope('input'):
        ftr_in = tf.placeholder(dtype=tf.float32,
                                shape=(batch_size, nb_nodes, ft_size))
        if sparse:
            # bias_in = tf.sparse_placeholder(dtype=tf.float32)
            bias_in = tf.SparseTensor(*biases)
        else:
            bias_in = tf.placeholder(dtype=tf.float32,
                                     shape=(batch_size, nb_nodes, nb_nodes))
        lbl_in = tf.placeholder(dtype=tf.int32,
                                shape=(batch_size, nb_nodes, nb_classes))
        msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))
        attn_drop = tf.placeholder(dtype=tf.float32, shape=())
        ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
        is_train = tf.placeholder(dtype=tf.bool, shape=())

    logits = model.inference(inputs=ftr_in,
                             edge_adjs=adjs,
                             nb_classes=nb_classes,
                             nb_nodes=nb_nodes,
                             training=is_train,
                             attn_drop=attn_drop,
                             ffd_drop=ffd_drop,
                             bias_mat=bias_in,
                             hid_units=hid_units,
                             n_heads=n_heads,
                             residual=residual,
                             activation=nonlinearity,
                             edge_attr_name=edge_attr_name)
    log_resh = tf.reshape(logits, [-1, nb_classes])
    lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
    msk_resh = tf.reshape(msk_in, [-1])
    loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
    micro_f1 = model.micro_f1(log_resh, lab_resh, msk_resh)
    accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)

    train_op = model.training(loss, lr, l2_coef)

    saver = tf.train.Saver()

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    vlss_mn = np.inf
    vacc_mx = 0.0
    vmf1_mx = 0.0
    curr_step = 0

    with tf.Session() as sess:
        merged = tf.summary.merge_all()
        train_summary_writer = tf.summary.FileWriter(log_path + '/train')
        test_summary_writer = tf.summary.FileWriter(log_path + '/test')
        sess.run(init_op)

        train_loss_avg = 0
        train_acc_avg = 0
        train_microf1_avg = 0
        val_loss_avg = 0
        val_acc_avg = 0
        val_microf1_avg = 0

        for epoch in range(nb_epochs):
            tr_step = 0
            tr_size = features.shape[0]

            while tr_step * batch_size < tr_size:
                if sparse:
                    bbias = biases
                else:
                    bbias = biases[tr_step * batch_size:(tr_step + 1) *
                                   batch_size]

                _, summary, loss_value_tr, acc_tr, micro_f1_tr = sess.run(
                    [train_op, merged, loss, accuracy, micro_f1],
                    feed_dict={
                        ftr_in:
                        features[tr_step * batch_size:(tr_step + 1) *
                                 batch_size],
                        # bias_in: bbias,
                        lbl_in:
                        y_train[tr_step * batch_size:(tr_step + 1) *
                                batch_size],
                        msk_in:
                        train_mask[tr_step * batch_size:(tr_step + 1) *
                                   batch_size],
                        is_train:
                        True,
                        attn_drop:
                        attention_drop,
                        ffd_drop:
                        in_drop
                    })
                print(loss_value_tr)
                train_microf1_avg += micro_f1_tr
                train_loss_avg += loss_value_tr
                train_acc_avg += acc_tr
                tr_step += 1
                train_summary_writer.add_summary(summary, epoch)

            vl_step = 0
            vl_size = features.shape[0]

            while vl_step * batch_size < vl_size:
                if sparse:
                    bbias = biases
                else:
                    bbias = biases[vl_step * batch_size:(vl_step + 1) *
                                   batch_size]
                summary, loss_value_vl, acc_vl, micro_f1_vl = sess.run(
                    [merged, loss, accuracy, micro_f1],
                    feed_dict={
                        ftr_in:
                        features[vl_step * batch_size:(vl_step + 1) *
                                 batch_size],
                        # bias_in: bbias,
                        lbl_in:
                        y_val[vl_step * batch_size:(vl_step + 1) * batch_size],
                        msk_in:
                        val_mask[vl_step * batch_size:(vl_step + 1) *
                                 batch_size],
                        is_train:
                        False,
                        attn_drop:
                        0.0,
                        ffd_drop:
                        0.0
                    })
                val_microf1_avg += micro_f1_vl
                val_loss_avg += loss_value_vl
                val_acc_avg += acc_vl
                vl_step += 1
                test_summary_writer.add_summary(summary, epoch)

            print(
                'Training: loss = %.5f, acc = %.5f, micro_f1 = %.5f | Val: loss = %.5f, acc = %.5f, micro_f1 = %.5f'
                % (train_loss_avg / tr_step, train_acc_avg / tr_step,
                   train_microf1_avg / tr_step, val_loss_avg / vl_step,
                   val_acc_avg / vl_step, val_microf1_avg / vl_step))

            if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn:
                if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn:
                    vacc_early_model = val_acc_avg / vl_step
                    vlss_early_model = val_loss_avg / vl_step
                    saver.save(sess, checkpt_file)
                vmf1_mx = np.max((val_microf1_avg / vl_step, vmf1_mx))
                vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx))
                vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn))
                curr_step = 0
            else:
                curr_step += 1
                if curr_step == patience:
                    print('Early stop! Min loss: ', vlss_mn,
                          ', Max accuracy: ', vacc_mx, ', Max Micro-f1',
                          vmf1_mx)
                    print('Early stop model validation loss: ',
                          vlss_early_model, ', accuracy: ', vacc_early_model)
                    break

            train_loss_avg = 0
            train_acc_avg = 0
            train_microf1_avg = 0
            val_loss_avg = 0
            val_acc_avg = 0
            val_microf1_avg = 0

        saver.restore(sess, checkpt_file)

        ts_size = features.shape[0]
        ts_step = 0
        ts_loss = 0.0
        ts_acc = 0.0

        while ts_step * batch_size < ts_size:
            if sparse:
                bbias = biases
            else:
                bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size]
            loss_value_ts, acc_ts = sess.run(
                [loss, accuracy],
                feed_dict={
                    ftr_in:
                    features[ts_step * batch_size:(ts_step + 1) * batch_size],
                    # bias_in: bbias,
                    lbl_in:
                    y_test[ts_step * batch_size:(ts_step + 1) * batch_size],
                    msk_in:
                    test_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                    is_train:
                    False,
                    attn_drop:
                    0.0,
                    ffd_drop:
                    0.0
                })
            ts_loss += loss_value_ts
            ts_acc += acc_ts
            ts_step += 1

        print('Test loss:', ts_loss / ts_step, '; Test accuracy:',
              ts_acc / ts_step)

        sess.close()
Exemple #11
0
def run_gat(dataset, batch_size, nb_epochs, patience, lr, l2_coef, hid_units,
            n_heads, residual, nonlinearity, model, checkpt_file, nhood):
    # redirect output to file
    import sys

    orig_stdout = sys.stdout
    if os.path.isfile(os.path.dirname(checkpt_file) + 'out.txt'):
        f = open(os.path.dirname(checkpt_file) + 'out.txt', 'a')
        print('\n\n\n\n')
    else:
        f = open(os.path.dirname(checkpt_file) + 'out.txt', 'w')
    sys.stdout = f

    print('Dataset: ' + dataset)
    print('batch_size: ' + str(batch_size))
    print('----- Opt. hyperparams -----')
    print('lr: ' + str(lr))
    print('l2_coef: ' + str(l2_coef))
    print('----- Archi. hyperparams -----')
    print('nb. layers: ' + str(len(hid_units)))
    print('nb. units per layer: ' + str(hid_units))
    print('nb. attention heads: ' + str(n_heads))
    print('residual: ' + str(residual))
    print('nonlinearity: ' + str(nonlinearity))
    print('model: ' + str(model))
    print('nhood: ' + str(nhood))

    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(
        dataset)
    features, spars = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]

    adj = adj.todense()

    features = features[np.newaxis]
    adj = adj[np.newaxis]
    y_train = y_train[np.newaxis]
    y_val = y_val[np.newaxis]
    y_test = y_test[np.newaxis]
    train_mask = train_mask[np.newaxis]
    val_mask = val_mask[np.newaxis]
    test_mask = test_mask[np.newaxis]

    biases = process.adj_to_bias(adj, [nb_nodes], nhood=nhood)

    with tf.Graph().as_default():
        with tf.name_scope('input'):
            ftr_in = tf.placeholder(dtype=tf.float32,
                                    shape=(batch_size, nb_nodes, ft_size))
            bias_in = tf.placeholder(dtype=tf.float32,
                                     shape=(batch_size, nb_nodes, nb_nodes))
            lbl_in = tf.placeholder(dtype=tf.int32,
                                    shape=(batch_size, nb_nodes, nb_classes))
            msk_in = tf.placeholder(dtype=tf.int32,
                                    shape=(batch_size, nb_nodes))
            attn_drop = tf.placeholder(dtype=tf.float32, shape=())
            ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
            is_train = tf.placeholder(dtype=tf.bool, shape=())

        logits = model.inference(ftr_in,
                                 nb_classes,
                                 nb_nodes,
                                 is_train,
                                 attn_drop,
                                 ffd_drop,
                                 bias_mat=bias_in,
                                 hid_units=hid_units,
                                 n_heads=n_heads,
                                 residual=residual,
                                 activation=nonlinearity)
        log_resh = tf.reshape(logits, [-1, nb_classes])
        lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
        msk_resh = tf.reshape(msk_in, [-1])
        loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
        accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)

        train_op = model.training(loss, lr, l2_coef)

        saver = tf.train.Saver()

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        vlss_mn = np.inf
        vacc_mx = 0.0
        curr_step = 0

        start = time.time()

        with tf.Session() as sess:
            sess.run(init_op)

            train_loss_avg = 0
            train_acc_avg = 0
            val_loss_avg = 0
            val_acc_avg = 0

            for epoch in range(nb_epochs):
                tr_step = 0
                tr_size = features.shape[0]

                while tr_step * batch_size < tr_size:
                    _, loss_value_tr, acc_tr = sess.run(
                        [train_op, loss, accuracy],
                        feed_dict={
                            ftr_in:
                            features[tr_step * batch_size:(tr_step + 1) *
                                     batch_size],
                            bias_in:
                            biases[tr_step * batch_size:(tr_step + 1) *
                                   batch_size],
                            lbl_in:
                            y_train[tr_step * batch_size:(tr_step + 1) *
                                    batch_size],
                            msk_in:
                            train_mask[tr_step * batch_size:(tr_step + 1) *
                                       batch_size],
                            is_train:
                            True,
                            attn_drop:
                            0.6,
                            ffd_drop:
                            0.6
                        })
                    train_loss_avg += loss_value_tr
                    train_acc_avg += acc_tr
                    tr_step += 1

                vl_step = 0
                vl_size = features.shape[0]

                while vl_step * batch_size < vl_size:
                    loss_value_vl, acc_vl = sess.run(
                        [loss, accuracy],
                        feed_dict={
                            ftr_in:
                            features[vl_step * batch_size:(vl_step + 1) *
                                     batch_size],
                            bias_in:
                            biases[vl_step * batch_size:(vl_step + 1) *
                                   batch_size],
                            lbl_in:
                            y_val[vl_step * batch_size:(vl_step + 1) *
                                  batch_size],
                            msk_in:
                            val_mask[vl_step * batch_size:(vl_step + 1) *
                                     batch_size],
                            is_train:
                            False,
                            attn_drop:
                            0.0,
                            ffd_drop:
                            0.0
                        })
                    val_loss_avg += loss_value_vl
                    val_acc_avg += acc_vl
                    vl_step += 1

                print(
                    'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f'
                    % (train_loss_avg / tr_step, train_acc_avg / tr_step,
                       val_loss_avg / vl_step, val_acc_avg / vl_step))

                if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn:
                    if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn:
                        vacc_early_model = val_acc_avg / vl_step
                        vlss_early_model = val_loss_avg / vl_step
                        saver.save(sess, checkpt_file)
                    vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx))
                    vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn))
                    curr_step = 0
                else:
                    curr_step += 1
                    if curr_step == patience:
                        print('Early stop! Min loss: ', vlss_mn,
                              ', Max accuracy: ', vacc_mx)
                        print('Early stop model validation loss: ',
                              vlss_early_model, ', accuracy: ',
                              vacc_early_model)
                        break

                train_loss_avg = 0
                train_acc_avg = 0
                val_loss_avg = 0
                val_acc_avg = 0

            saver.restore(sess, checkpt_file)

            ts_size = features.shape[0]
            ts_step = 0
            ts_loss = 0.0
            ts_acc = 0.0

            while ts_step * batch_size < ts_size:
                loss_value_ts, acc_ts = sess.run(
                    [loss, accuracy],
                    feed_dict={
                        ftr_in:
                        features[ts_step * batch_size:(ts_step + 1) *
                                 batch_size],
                        bias_in:
                        biases[ts_step * batch_size:(ts_step + 1) *
                               batch_size],
                        lbl_in:
                        y_test[ts_step * batch_size:(ts_step + 1) *
                               batch_size],
                        msk_in:
                        test_mask[ts_step * batch_size:(ts_step + 1) *
                                  batch_size],
                        is_train:
                        False,
                        attn_drop:
                        0.0,
                        ffd_drop:
                        0.0
                    })
                ts_loss += loss_value_ts
                ts_acc += acc_ts
                ts_step += 1

            print('Test loss:', ts_loss / ts_step, '; Test accuracy:',
                  ts_acc / ts_step, ' at epoch: ', epoch, ' elapsed time',
                  time.time() - start)

            sess.close()
    sys.stdout = orig_stdout
    f.close()
Exemple #12
0
from utils import process

dataset = 'cora'

# training params
batch_size = 1
nb_epochs = 10000
patience = 20
lr = 0.001
l2_coef = 0.0
drop_prob = 0.0
hid_units = 512
sparse = True
nonlinearity = 'prelu' # special name to separate parameters

adj, features, labels, idx_train, idx_val, idx_test = process.load_data(dataset, use_dgi_data=False)
features, _ = process.preprocess_features(features)

nb_nodes = features.shape[0]
ft_size = features.shape[1]
nb_classes = labels.shape[1]

adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))

if sparse:
    sp_adj = process.sparse_mx_to_torch_sparse_tensor(adj)
else:
    adj = (adj + sp.eye(adj.shape[0])).todense()

features = torch.FloatTensor(features[np.newaxis])
if not sparse:
Exemple #13
0
def run_gat(dataset,
            batch_size,
            nb_epochs,
            patience,
            lr,
            l2_coef,
            hid_units,
            n_heads,
            residual,
            nonlinearity,
            model,
            checkpt_file,
            nhood,
            param_attn_drop=0.6,
            param_ffd_drop=0.6,
            sparse=False):
    '''
    Function that runs all the experiments.
    :param dataset: The string name of the dataset.
    :param batch_size: Number of samples per batch. Has to be one for spartial execution.
    :param nb_epochs: Number of epochs that the method runs
    :param patience: The number of epochs with no improvement in validation accuracy that stops the training.
    :param lr: Learning rate.
    :param l2_coef: The L2 regularization strength.
    :param hid_units: List. Number of features the respecting layer produces from the input features.
    :param n_heads: List. Number of entries is the number of layers. The elements value is the number of attention heads.
    :param residual: Whether or not to use residual connections in the hidden layers.
    :param nonlinearity: tensorflow function for non-linearity
    :param model: Model that inherits from BasGAttn and implements the inference method
    :param checkpt_file: Location where the logs, output and model checkpoints are saved
    :param nhood: The neighborhood to consider. One for direct neighborhood and two for neighbors of neighbors.
    :param param_attn_drop: Drops a percent of attention coefficients.
    :param param_ffd_drop: Drops a percent of inputs from the previous layer.
    :param sparse: If True, the model has to be SpGAT
    :return: Prints and logs results.
    '''

    # necessary work around to run on GPU
    '''
    from tensorflow.compat.v1 import ConfigProto
    from tensorflow.compat.v1 import InteractiveSession

    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    '''

    # redirect output to file
    import sys

    orig_stdout = sys.stdout
    if os.path.isfile(os.path.dirname(checkpt_file) + 'out.txt'):
        f = open(os.path.dirname(checkpt_file) + 'out.txt', 'a')
        sys.stdout = f
        print('\n\n\n\n')
    else:
        f = open(os.path.dirname(checkpt_file) + 'out.txt', 'w')
        sys.stdout = f

    print('Dataset: ' + dataset)
    print('batch_size: ' + str(batch_size))
    print('----- Opt. hyperparams -----')
    print('lr: ' + str(lr))
    print('l2_coef: ' + str(l2_coef))
    print('----- Archi. hyperparams -----')
    print('nb. layers: ' + str(len(hid_units)))
    print('nb. units per layer: ' + str(hid_units))
    print('nb. attention heads: ' + str(n_heads))
    print('residual: ' + str(residual))
    print('nonlinearity: ' + str(nonlinearity))
    print('model: ' + str(model))
    print('nhood: ' + str(nhood))
    print('attn_drop: ' + str(param_attn_drop))
    print('ffd_drop: ' + str(param_ffd_drop))

    # load any of the three transductive datasets
    # adj has information about the connections
    # features is a node node x features matrix with the features for each node
    # y_... has the label for each class in a node x class matrix
    # mask has 0 or 1 as value in a node vector, this is used to mask train, val and test set
    # so for all nodes all information is calculated, but only nodes masked with 1 are evaluated
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(
        dataset)
    # preprocessing steps
    features, spars = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]

    features = features[np.newaxis]
    # adj = adj[np.newaxis]
    y_train = y_train[np.newaxis]
    y_val = y_val[np.newaxis]
    y_test = y_test[np.newaxis]
    train_mask = train_mask[np.newaxis]
    val_mask = val_mask[np.newaxis]
    test_mask = test_mask[np.newaxis]

    # the adjacency matrix is transformed into a bias that is added.
    # when no connection between nodes exist in the specified neighborhood, the value of the attention between
    # both nodes is set to a big negative value, pratically canceling out the effect.
    if sparse:
        biases = process.preprocess_adj_bias(adj)
    else:
        adj = adj.todense()
        adj = adj[np.newaxis]
        biases = process.adj_to_bias(adj, [nb_nodes], nhood=1)

    with tf.Graph().as_default():
        with tf.name_scope('input'):
            # initialization
            ftr_in = tf.placeholder(dtype=tf.float32,
                                    shape=(batch_size, nb_nodes, ft_size))
            if sparse:
                bias_in = tf.sparse_placeholder(dtype=tf.float32)
            else:
                bias_in = tf.placeholder(dtype=tf.float32,
                                         shape=(batch_size, nb_nodes,
                                                nb_nodes))

            # bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes))
            lbl_in = tf.placeholder(dtype=tf.int32,
                                    shape=(batch_size, nb_nodes, nb_classes))
            msk_in = tf.placeholder(dtype=tf.int32,
                                    shape=(batch_size, nb_nodes))
            attn_drop = tf.placeholder(dtype=tf.float32, shape=())
            ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
            is_train = tf.placeholder(dtype=tf.bool, shape=())

        logits = model.inference(ftr_in,
                                 nb_classes,
                                 nb_nodes,
                                 is_train,
                                 attn_drop,
                                 ffd_drop,
                                 bias_mat=bias_in,
                                 hid_units=hid_units,
                                 n_heads=n_heads,
                                 residual=residual,
                                 activation=nonlinearity)
        log_resh = tf.reshape(logits, [-1, nb_classes])
        lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
        msk_resh = tf.reshape(msk_in, [-1])
        loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
        accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)

        train_op = model.training(loss, lr, l2_coef)

        saver = tf.train.Saver()

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        vlss_mn = np.inf
        vacc_mx = 0.0
        curr_step = 0

        start = time.time()

        with tf.Session() as sess:
            sess.run(init_op)

            train_loss_avg = 0
            train_acc_avg = 0
            val_loss_avg = 0
            val_acc_avg = 0

            for epoch in range(nb_epochs):
                tr_step = 0
                tr_size = features.shape[0]
                # training steps
                while tr_step * batch_size < tr_size:
                    if sparse:
                        bbias = biases
                    else:
                        bbias = biases[tr_step * batch_size:(tr_step + 1) *
                                       batch_size]

                    _, loss_value_tr, acc_tr = sess.run(
                        [train_op, loss, accuracy],
                        feed_dict={
                            ftr_in:
                            features[tr_step * batch_size:(tr_step + 1) *
                                     batch_size],
                            bias_in:
                            bbias,
                            lbl_in:
                            y_train[tr_step * batch_size:(tr_step + 1) *
                                    batch_size],
                            msk_in:
                            train_mask[tr_step * batch_size:(tr_step + 1) *
                                       batch_size],
                            is_train:
                            True,
                            attn_drop:
                            param_attn_drop,
                            ffd_drop:
                            param_ffd_drop
                        })
                    train_loss_avg += loss_value_tr
                    train_acc_avg += acc_tr
                    tr_step += 1

                vl_step = 0
                vl_size = features.shape[0]
                # validation steps
                while vl_step * batch_size < vl_size:
                    if sparse:
                        bbias = biases
                    else:
                        bbias = biases[vl_step * batch_size:(vl_step + 1) *
                                       batch_size]

                    loss_value_vl, acc_vl = sess.run(
                        [loss, accuracy],
                        feed_dict={
                            ftr_in:
                            features[vl_step * batch_size:(vl_step + 1) *
                                     batch_size],
                            bias_in:
                            bbias,
                            lbl_in:
                            y_val[vl_step * batch_size:(vl_step + 1) *
                                  batch_size],
                            msk_in:
                            val_mask[vl_step * batch_size:(vl_step + 1) *
                                     batch_size],
                            is_train:
                            False,
                            attn_drop:
                            0.0,
                            ffd_drop:
                            0.0
                        })
                    val_loss_avg += loss_value_vl
                    val_acc_avg += acc_vl
                    vl_step += 1

                print(
                    'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f'
                    % (train_loss_avg / tr_step, train_acc_avg / tr_step,
                       val_loss_avg / vl_step, val_acc_avg / vl_step))

                # patience step
                if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn:
                    if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn:
                        vacc_early_model = val_acc_avg / vl_step
                        vlss_early_model = val_loss_avg / vl_step
                        saver.save(sess, checkpt_file)
                    vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx))
                    vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn))
                    curr_step = 0
                else:
                    curr_step += 1
                    if curr_step == patience:
                        print('Early stop! Min loss: ', vlss_mn,
                              ', Max accuracy: ', vacc_mx)
                        print('Early stop model validation loss: ',
                              vlss_early_model, ', accuracy: ',
                              vacc_early_model)
                        break

                train_loss_avg = 0
                train_acc_avg = 0
                val_loss_avg = 0
                val_acc_avg = 0

            saver.restore(sess, checkpt_file)

            ts_size = features.shape[0]
            ts_step = 0
            ts_loss = 0.0
            ts_acc = 0.0
            # evaluate on the training set
            while ts_step * batch_size < ts_size:
                if sparse:
                    bbias = biases
                else:
                    bbias = biases[ts_step * batch_size:(ts_step + 1) *
                                   batch_size]
                loss_value_ts, acc_ts = sess.run(
                    [loss, accuracy],
                    feed_dict={
                        ftr_in:
                        features[ts_step * batch_size:(ts_step + 1) *
                                 batch_size],
                        bias_in:
                        bbias,
                        lbl_in:
                        y_test[ts_step * batch_size:(ts_step + 1) *
                               batch_size],
                        msk_in:
                        test_mask[ts_step * batch_size:(ts_step + 1) *
                                  batch_size],
                        is_train:
                        False,
                        attn_drop:
                        0.0,
                        ffd_drop:
                        0.0
                    })
                ts_loss += loss_value_ts
                ts_acc += acc_ts
                ts_step += 1

            print('Test loss:', ts_loss / ts_step, '; Test accuracy:',
                  ts_acc / ts_step, ' at epoch: ', epoch, ' elapsed time',
                  time.time() - start)

            # log information about the training
            if os.path.isfile(os.path.dirname(checkpt_file) + 'log.csv'):
                print('loading existing log')
                df = pd.read_csv(os.path.dirname(checkpt_file) + 'log.csv',
                                 index_col=['run'])
                print('log: ' + str(df))
            else:
                print('Creating new log')
                df = pd.DataFrame(columns=tracking_params + result_cols)

            log = dict(
                zip(tracking_params + result_cols, [
                    dataset, lr, l2_coef, hid_units, n_heads, residual,
                    str(nonlinearity).split(' ')[1], param_attn_drop,
                    param_ffd_drop, nhood
                ] + [
                    epoch,
                    time.time() - start, vlss_mn, vacc_mx, ts_loss / ts_step,
                    ts_acc / ts_step
                ]))

            print('Adding entry: ' + str(log))

            df = df.append(log, ignore_index=True)
            print('saving logs')
            df.to_csv(os.path.dirname(checkpt_file) + 'log.csv',
                      index_label='run')
            print('log save succesfull')

            sess.close()
    # restore standard output
    sys.stdout = orig_stdout
    f.close()
def train(sparse, hid_units, n_heads, residual, edge_attr_directory,
          node_features_path, label_path, train_ratio, model_path):
    # flags = tf.app.flags
    # FLAGS = flags.FLAGS

    # flags.DEFINE_string('summaries_dir', log_path, 'Summaries directory')
    # if tf.gfile.Exists(log_path):
    #     tf.gfile.DeleteRecursively(log_path)
    # tf.gfile.MakeDirs(log_path)

    checkpt_file = model_path

    dataset = 'know'

    # training params
    batch_size = 1
    nonlinearity = tf.nn.elu
    if sparse:
        model = SpGAT
    else:
        model = GAT
    nhood = 1
    # in_drop = attention_drop

    print('Dataset: ' + dataset)
    print('----- Archi. hyperparams -----')
    print('nb. layers: ' + str(len(hid_units)))
    print('nb. units per layer: ' + str(hid_units))
    print('nb. attention heads: ' + str(n_heads))
    print('residual: ' + str(residual))
    print('nonlinearity: ' + str(nonlinearity))
    print('model: ' + str(model))

    adjs, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_attr_name = process.load_data(
        edge_attr_directory, node_features_path, label_path, train_ratio)
    features, spars = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]

    features = features[np.newaxis]
    # adj = adj[np.newaxis]
    # adjs = [adj[np.newaxis] for adj in adjs]
    y_train = y_train[np.newaxis]
    y_val = y_val[np.newaxis]
    y_test = y_test[np.newaxis]
    train_mask = train_mask[np.newaxis]
    val_mask = val_mask[np.newaxis]
    test_mask = test_mask[np.newaxis]

    if sparse:
        biases = process.preprocess_adj_bias(
            adjs[0], to_unweighted=True
        )  # sparse (indices, values, dense_shape), the graph topologies (unweighted)
        adjs = [
            tf.SparseTensor(
                *process.preprocess_adj_bias(adj, to_unweighted=False))
            for adj in adjs
        ]
    else:
        biases = process.adj_to_bias(adjs[0], [nb_nodes], nhood=nhood)
    # biases = process.get_bias_mat(adjs[0], [nb_nodes], nhood=nhood)
    print(biases)

    # with tf.Graph().as_default():
    with tf.name_scope('input'):
        ftr_in = tf.placeholder(dtype=tf.float32,
                                shape=(batch_size, nb_nodes, ft_size))
        if sparse:
            # bias_in = tf.sparse_placeholder(dtype=tf.float32)
            bias_in = tf.SparseTensor(*biases)
        else:
            bias_in = tf.placeholder(dtype=tf.float32,
                                     shape=(batch_size, nb_nodes, nb_nodes))
        lbl_in = tf.placeholder(dtype=tf.int32,
                                shape=(batch_size, nb_nodes, nb_classes))
        msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))
        attn_drop = tf.placeholder(dtype=tf.float32, shape=())
        ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
        is_train = tf.placeholder(dtype=tf.bool, shape=())

    logits = model.inference(inputs=ftr_in,
                             edge_adjs=adjs,
                             nb_classes=nb_classes,
                             nb_nodes=nb_nodes,
                             training=is_train,
                             attn_drop=attn_drop,
                             ffd_drop=ffd_drop,
                             bias_mat=bias_in,
                             hid_units=hid_units,
                             n_heads=n_heads,
                             residual=residual,
                             activation=nonlinearity,
                             edge_attr_name=edge_attr_name)
    log_resh = tf.reshape(logits, [-1, nb_classes])
    lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
    msk_resh = tf.reshape(msk_in, [-1])
    loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
    micro_f1 = model.micro_f1(log_resh, lab_resh, msk_resh)
    accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)

    # train_op = model.training(loss, lr, l2_coef)

    saver = tf.train.Saver()

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    with tf.Session() as sess:
        sess.run(init_op)

        saver.restore(sess, checkpt_file)

        ts_size = features.shape[0]
        ts_step = 0
        ts_loss = 0.0
        ts_acc = 0.0

        while ts_step * batch_size < ts_size:
            if sparse:
                bbias = biases
            else:
                bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size]
            loss_value_ts, acc_ts = sess.run(
                [loss, accuracy],
                feed_dict={
                    ftr_in:
                    features[ts_step * batch_size:(ts_step + 1) * batch_size],
                    # bias_in: bbias,
                    lbl_in:
                    y_test[ts_step * batch_size:(ts_step + 1) * batch_size],
                    msk_in:
                    test_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                    is_train:
                    False,
                    attn_drop:
                    0.0,
                    ffd_drop:
                    0.0
                })
            ts_loss += loss_value_ts
            ts_acc += acc_ts
            ts_step += 1

        print('Test loss:', ts_loss / ts_step, '; Test accuracy:',
              ts_acc / ts_step)

        sess.close()
Exemple #15
0
nonlinearity = tf.nn.elu
model = GAT

print('Dataset: ' + dataset)
print('----- Opt. hyperparams -----')
print('lr: ' + str(lr))
print('l2_coef: ' + str(l2_coef))
print('----- Archi. hyperparams -----')
print('nb. layers: ' + str(len(hid_units)))
print('nb. units per layer: ' + str(hid_units))
print('nb. attention heads: ' + str(n_heads))
print('residual: ' + str(residual))
print('nonlinearity: ' + str(nonlinearity))
print('model: ' + str(model))

adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(dataset)
features, spars = process.preprocess_features(features)

nb_nodes = features.shape[0]
ft_size = features.shape[1]
nb_classes = y_train.shape[1]

adj = adj.todense()

features = features[np.newaxis]
adj = adj[np.newaxis]
y_train = y_train[np.newaxis]
y_val = y_val[np.newaxis]
y_test = y_test[np.newaxis]
train_mask = train_mask[np.newaxis]
val_mask = val_mask[np.newaxis]
Exemple #16
0
from utils import process
from utils import metrics

dataset = "citeseer"
batch_size = 1
nb_epochs = 200
patience = 20
lr = 0.001
l2_coef = 0.0
drop_prob = 0.0
hid_units = 32
out_units = 16
sparse = False
nonlinearity = "prelu"

adj, features, labels, g = process.load_data(dataset)
features, _ = process.preprocess_features(features)

nb_nodes = features.shape[0]
ft_size = features.shape[1]
nb_classes = len(labels)

adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
adj = (adj + sp.eye(adj.shape[0])).todense()
adj = torch.FloatTensor(adj[np.newaxis])

features = torch.FloatTensor(features[np.newaxis])
# labels = torch.FloatTensor(labels[np.newaxis])

model = DGI(ft_size, hid_units, nonlinearity)
# model = GAE(ft_size, hid_units, out_units, nonlinearity)
    parser.add_argument('--b', dest='beta', type=int, default=100,help='')
    parser.add_argument('--c', dest='num_clusters', type=float, default=128,help='')
    parser.add_argument('--a', dest='alpha', type=float, default=0.5,help='')
    parser.add_argument('--test_rate', dest='test_rate', type=float, default=0.1,help='')
    args = parser.parse_args()
    return args

if __name__ == "__main__":

    args = parser_loader()
    pruning_gcn.print_args(args)

    rewind_weight = None

    dataset = args.dataset
    adj, features, labels, idx_train, idx_val, idx_test = process.load_data(dataset)
    adj_sparse = adj
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    test_edges, test_edges_false = process.mask_test_edges(adj, test_frac=args.test_rate, val_frac=0.05)
    adj = adj_train
    features, _ = process.preprocess_features(features)
    features = torch.FloatTensor(features[np.newaxis]).cuda()
    adj = torch.FloatTensor(adj.todense()).cuda()
    labels = torch.FloatTensor(labels[np.newaxis]).cuda()

    dataset_dict = {}
    dataset_dict['adj'] = adj
    dataset_dict['adj_sparse'] = adj_sparse
    dataset_dict['features'] = features
    dataset_dict['labels'] = labels
    dataset_dict['val_edges'] = val_edges
Exemple #18
0
print('nb. units per layer: ' + str(hid_units))
print('nb. attention heads: ' + str(n_heads))
print('residual: ' + str(residual))
print('nonlinearity: ' + str(nonlinearity))
print('model: ' + str(model))

sparse = True

for per_class in [200]:
    result_path = 'result/' + dataset + str(per_class) + '/'
    if not os.path.exists('result'):
        os.mkdir('result')
    if not os.path.exists(result_path):
        os.mkdir(result_path)

    metapaths, metapaths_name, adjs, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(
        dataset, per_class)
    features, spars = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]

    features = features[np.newaxis]
    y_train = y_train[np.newaxis]
    y_val = y_val[np.newaxis]
    y_test = y_test[np.newaxis]
    train_mask = train_mask[np.newaxis]
    val_mask = val_mask[np.newaxis]
    test_mask = test_mask[np.newaxis]

    all_metapaths_best = (0, 0, 0, 0)