Beispiel #1
0
    def checkGraph(self, adj_list, fea_list, y_train, y_val, y_test, train_mask, val_mask, test_mask, y_all, all_mask, needtSNE=False, rawFeature=[]):

        prec, rec, f1 = 0.0, 0.0, 0.0
        nb_nodes = fea_list[0].shape[0]
        ft_size = fea_list[0].shape[1]
        nb_classes = y_train.shape[1]

        # adj = adj.todense()

        # features = features[np.newaxis]  # [1, nb_node, ft_size]
        fea_list = [fea[np.newaxis] for fea in fea_list]
        adj_list = [adj[np.newaxis] for adj in adj_list]


        biases_list = [process.adj_to_bias(adj, [nb_nodes], nhood=1) for adj in adj_list]

        print('build graph...')
        with tf.Graph().as_default():
            with tf.name_scope('input'):
                ftr_in_list = [tf.placeholder(dtype=tf.float32,
                                              shape=(batch_size, nb_nodes, ft_size),
                                              name='ftr_in_{}'.format(i))
                               for i in range(len(fea_list))]
                bias_in_list = [tf.placeholder(dtype=tf.float32,
                                               shape=(batch_size, nb_nodes, nb_nodes),
                                               name='bias_in_{}'.format(i))
                                for i in range(len(biases_list))]
                lbl_in = tf.placeholder(dtype=tf.int32, shape=(
                    batch_size, nb_nodes, nb_classes), name='lbl_in')
                msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes),
                                        name='msk_in')
                attn_drop = tf.placeholder(dtype=tf.float32, shape=(), name='attn_drop')
                ffd_drop = tf.placeholder(dtype=tf.float32, shape=(), name='ffd_drop')
                is_train = tf.placeholder(dtype=tf.bool, shape=(), name='is_train')
            # forward
            logits, final_embedding, att_val = model.inference(ftr_in_list, nb_classes, nb_nodes, is_train,
                                                               attn_drop, ffd_drop,
                                                               bias_mat_list=bias_in_list,
                                                               hid_units=hid_units, n_heads=n_heads,
                                                               mp_att_size=200,
                                                               residual=residual, activation=nonlinearity)
        return logits, final_embedding, att_val
Beispiel #2
0
    def train(self, adj_list, fea_list, y_train, y_val, y_test, train_mask, val_mask, test_mask, y_all, all_mask, rawlabels, needtSNE=False, rawFeature=[]):

        prec, rec, f1 = 0.0, 0.0, 0.0
        nb_nodes = fea_list[0].shape[0]
        ft_size = fea_list[0].shape[1]
        nb_classes = y_train.shape[1]
        # nb_classes = len(set(rawlabels))

        # adj = adj.todense()

        # features = features[np.newaxis]  # [1, nb_node, ft_size]
        fea_list = [fea[np.newaxis] for fea in fea_list]
        adj_list = [adj[np.newaxis] for adj in adj_list]
        y_train = y_train[np.newaxis]
        y_val = y_val[np.newaxis]
        y_test = y_test[np.newaxis]
        y_all = y_all[np.newaxis]

        train_mask = train_mask[np.newaxis]
        val_mask = val_mask[np.newaxis]
        test_mask = test_mask[np.newaxis]
        all_mask = all_mask[np.newaxis]

        biases_list = [process.adj_to_bias(adj, [nb_nodes], nhood=1) for adj in adj_list]

        print('build graph...')
        with tf.Graph().as_default():
            with tf.name_scope('input'):
                metric_ftr_in = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ft_size), name='metric_ftr_in')
                ftr_in_list = [tf.placeholder(dtype=tf.float32,
                                              shape=(batch_size, nb_nodes, ft_size),
                                              name='ftr_in_{}'.format(i))
                               for i in range(len(fea_list))]
                bias_in_list = [tf.placeholder(dtype=tf.float32,
                                               shape=(batch_size, nb_nodes, nb_nodes),
                                               name='bias_in_{}'.format(i))
                                for i in range(len(biases_list))]
                lbl_in = tf.placeholder(dtype=tf.int32, shape=(
                    batch_size, nb_nodes, nb_classes), name='lbl_in')
                msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes),
                                        name='msk_in')
                attn_drop = tf.placeholder(dtype=tf.float32, shape=(), name='attn_drop')
                ffd_drop = tf.placeholder(dtype=tf.float32, shape=(), name='ffd_drop')
                is_train = tf.placeholder(dtype=tf.bool, shape=(), name='is_train')

            # forward
            logits, final_embedding, att_val, centers_embed, test_final_embeed = model.inference(ftr_in_list, nb_classes, nb_nodes, is_train,
                                                               attn_drop, ffd_drop,
                                                               bias_mat_list=bias_in_list,
                                                               hid_units=hid_units, n_heads=n_heads, features=fea_list, labels=rawlabels,
                                                               residual=residual, activation=nonlinearity, feature_size=ft_size)


            # final_embedding: checkout Tensor("Sum:0", shape=(286, 64), dtype=float32)

            # logits: checkout Tensor("ExpandDims_3:0", shape=(1, 286, 30), dtype=float32)

            # cal masked_loss
            # lab_list = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ), name='lab_list')
            # ftr_resh = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ft_size), name='ftr_resh')
            log_resh = tf.reshape(logits, [-1, nb_classes])
            lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
            msk_resh = tf.reshape(msk_in, [-1])


            print ("final_embedding: checkout", final_embedding)
            print ("logits: checkout", logits)
            print ("log_resh: checkout", log_resh)
            # print ("ftr_resh: ", ftr_resh)
            print ("lab_resh: ", lab_resh)
            print ("fea_list: ", fea_list)
            print ("centers_embed: ", centers_embed)
            print ("batch_size, nb_nodes, nb_classes, ft_size", batch_size, nb_nodes, nb_classes, ft_size)

            osm_caa_loss = OSM_CAA_Loss(batch_size=nb_nodes)
            osm_loss = osm_caa_loss.forward

            # final_embedding: checkout Tensor("Sum:0", shape=(286, 64), dtype=float32)
            # logits: checkout Tensor("ExpandDims_3:0", shape=(1, 286, 30), dtype=float32)
            # log_resh: checkout Tensor("Reshape:0", shape=(286, 30), dtype=float32)
            # ftr_resh:  Tensor("ftr_resh:0", shape=(286, 100), dtype=float32)
            # lab_resh:  Tensor("Reshape_1:0", shape=(286, 30), dtype=int32)

            osmLoss, checkvalue = osm_loss(final_embedding, rawlabels, centers_embed)
            # osmLoss, checkvalue = osm_loss(metric_ftr_in, rawlabels, centers_embed)
            SoftMaxloss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
            loss = osmLoss
            # 为什么loss会固定
            # loss = osmLoss
            # loss = SoftMaxloss

            accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)
            # optimzie
            train_op = model.training(loss, lr, l2_coef)

            Path = 'pre_trained/{}/{}/{}'.format(dataset, dataset, self.name)
            self.mkdir(Path)
            checkpt_file = '{}/allMP_multi_{}_.ckpt'.format(Path, featype)
            print('model: {}'.format(checkpt_file))
            saver = tf.train.Saver()

            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())

            vlss_mn = np.inf
            vacc_mx = 0.0
            curr_step = 0

            with tf.Session(config=config) as sess:
                sess.run(init_op)

                train_loss_avg = 0
                train_acc_avg = 0
                val_loss_avg = 0
                val_acc_avg = 0

                for epoch in range(nb_epochs):
                    tr_step = 0

                    tr_size = fea_list[0].shape[0]
                    # ================   training    ============
                    while tr_step * batch_size < tr_size:
                        fd1 = {i: d[tr_step * batch_size:(tr_step + 1) * batch_size]
                               for i, d in zip(ftr_in_list, fea_list)}
                        fd2 = {i: d[tr_step * batch_size:(tr_step + 1) * batch_size]
                               for i, d in zip(bias_in_list, biases_list)}
                        fd3 = {lbl_in: y_train[tr_step * batch_size:(tr_step + 1) * batch_size],
                               msk_in: train_mask[tr_step * batch_size:(tr_step + 1) * batch_size],
                               metric_ftr_in: rawFeature,
                               is_train: True,
                               attn_drop: 0.6,
                               ffd_drop: 0.6}
                        fd = fd1
                        fd.update(fd2)
                        fd.update(fd3)
                        _, loss_value_tr, acc_tr, att_val_train = sess.run([train_op, loss, accuracy, att_val],
                                                                           feed_dict=fd)
                        test_check_value = sess.run(checkvalue, feed_dict=fd)
                        print ("test_check_value: ", test_check_value)

                        train_loss_avg += loss_value_tr
                        train_acc_avg += acc_tr
                        tr_step += 1


                    vl_step = 0
                    vl_size = fea_list[0].shape[0]
                    # =============   val       =================
                    while vl_step * batch_size < vl_size:
                        # fd1 = {ftr_in: features[vl_step * batch_size:(vl_step + 1) * batch_size]}
                        fd1 = {i: d[vl_step * batch_size:(vl_step + 1) * batch_size]
                               for i, d in zip(ftr_in_list, fea_list)}
                        fd2 = {i: d[vl_step * batch_size:(vl_step + 1) * batch_size]
                               for i, d in zip(bias_in_list, biases_list)}
                        fd3 = {lbl_in: y_val[vl_step * batch_size:(vl_step + 1) * batch_size],
                               msk_in: val_mask[vl_step * batch_size:(vl_step + 1) * batch_size],
                               metric_ftr_in: rawFeature,
                               is_train: False,
                               attn_drop: 0.0,
                               ffd_drop: 0.0}

                        fd = fd1
                        fd.update(fd2)
                        fd.update(fd3)
                        loss_value_vl, acc_vl = sess.run([loss, accuracy],
                                                         feed_dict=fd)
                        val_loss_avg += loss_value_vl
                        val_acc_avg += acc_vl
                        vl_step += 1
                    # import pdb; pdb.set_trace()
                    print('Epoch: {}, att_val: {}'.format(epoch, np.mean(att_val_train, axis=0)))
                    print('Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f | vl_step: %d, tr_step: %d' %
                          (train_loss_avg / tr_step, train_acc_avg / tr_step,
                           val_loss_avg / vl_step, val_acc_avg / vl_step, vl_step, tr_step))

                    if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn:
                        if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn:
                            vacc_early_model = val_acc_avg / vl_step
                            vlss_early_model = val_loss_avg / vl_step
                            saver.save(sess, checkpt_file)
                        vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx))
                        vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn))
                        curr_step = 0
                    else:
                        curr_step += 1
                        if curr_step == patience:
                            print('Early stop! Min loss: ', vlss_mn,
                                  ', Max accuracy: ', vacc_mx)
                            print('Early stop model validation loss: ',
                                  vlss_early_model, ', accuracy: ', vacc_early_model)
                            break

                    train_loss_avg = 0
                    train_acc_avg = 0
                    val_loss_avg = 0
                    val_acc_avg = 0
                # check save
                saver.save(sess, checkpt_file)

                saver.restore(sess, checkpt_file)
                print('load model from : {}'.format(checkpt_file))
                ts_size = fea_list[0].shape[0]
                ts_step = 0
                ts_loss = 0.0
                ts_acc = 0.0

                while ts_step * batch_size < ts_size:
                    fd1 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size]
                           for i, d in zip(ftr_in_list, fea_list)}
                    fd2 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size]
                           for i, d in zip(bias_in_list, biases_list)}
                    fd3 = {lbl_in: y_all[ts_step * batch_size:(ts_step + 1) * batch_size],
                           msk_in: all_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                           metric_ftr_in: rawFeature,
                          is_train: False,
                          attn_drop: 0.0,
                          ffd_drop: 0.0}

                    fd = fd1
                    fd.update(fd2)
                    fd.update(fd3)
                    loss_value_ts, acc_ts, jhy_final_embedding, test_final_embeed_check = sess.run([loss, accuracy, final_embedding, test_final_embeed],
                                                                          feed_dict=fd)
                    ts_loss += loss_value_ts
                    ts_acc += acc_ts
                    ts_step += 1

                xx = np.expand_dims(jhy_final_embedding, axis=0)[all_mask]
                xx2 = np.expand_dims(test_final_embeed_check, axis=0)[all_mask]
                yy = y_all[all_mask]


                print ("check fd")
                print('xx: {}, yy: {}, ts_size: {}, ts_step: {}, batch_size: {}'.format(xx.shape, yy.shape, ts_size, ts_step,batch_size))

                labels, numberofLabels = self.getLabel(yy)

                from utils import  clustering, pairwise_precision_recall_f1

                clusters_pred = clustering(xx2, num_clusters=numberofLabels)
                prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels)
                print ('prec: ', prec, ', rec: ', rec, ', f1: ', f1, ', originNumberOfClusterlabels: ', numberofLabels)

                if needtSNE:
                    tSNEAnanlyse(xx, labels, join(settings.PIC_DIR, "HAN", "rawReature_%s_final.png" % (self.name)))
                    tSNEAnanlyse(rawFeature, labels, join(settings.PIC_DIR, "HAN", "rawReature_%s_features.png" % (self.name)))
                    tSNEAnanlyse(xx2, labels, join(settings.PIC_DIR, "HAN", "rawReature_%s_xx2.png" % (self.name)))
                    tSNEAnanlyse(xx, clusters_pred, join(settings.PIC_DIR, "HAN", "rawReature_%s_result_label.png" % (self.name)))


                sess.close()

        return prec, rec, f1, xx2
Beispiel #3
0
    def MetricDebug(self, adj_list, fea_list, y_train, y_val, y_test, train_mask, val_mask, test_mask, y_all, all_mask, rawlabels, needtSNE=False, rawFeature=[]):
        prec, rec, f1 = 0.0, 0.0, 0.0
        nb_nodes = fea_list[0].shape[0]
        ft_size = fea_list[0].shape[1]
        nb_classes = y_train.shape[1]
        # nb_classes = len(set(rawlabels))

        # adj = adj.todense()

        # features = features[np.newaxis]  # [1, nb_node, ft_size]
        fea_list = [fea[np.newaxis] for fea in fea_list]
        adj_list = [adj[np.newaxis] for adj in adj_list]
        y_train = y_train[np.newaxis]
        y_val = y_val[np.newaxis]
        y_test = y_test[np.newaxis]
        y_all = y_all[np.newaxis]

        train_mask = train_mask[np.newaxis]
        val_mask = val_mask[np.newaxis]
        test_mask = test_mask[np.newaxis]
        all_mask = all_mask[np.newaxis]

        biases_list = [process.adj_to_bias(adj, [nb_nodes], nhood=1) for adj in adj_list]

        print('build graph...')
        with tf.Graph().as_default():
            with tf.name_scope('input'):
                metric_ftr_in = tf.placeholder(dtype=tf.float32, shape=(nb_nodes, ft_size), name='metric_ftr_in')
                ftr_in_list = [tf.placeholder(dtype=tf.float32,
                                              shape=(batch_size, nb_nodes, ft_size),
                                              name='ftr_in_{}'.format(i))
                               for i in range(len(fea_list))]
                bias_in_list = [tf.placeholder(dtype=tf.float32,
                                               shape=(batch_size, nb_nodes, nb_nodes),
                                               name='bias_in_{}'.format(i))
                                for i in range(len(biases_list))]
                lbl_in = tf.placeholder(dtype=tf.int32, shape=(
                    batch_size, nb_nodes, nb_classes), name='lbl_in')
                msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes),
                                        name='msk_in')
                attn_drop = tf.placeholder(dtype=tf.float32, shape=(), name='attn_drop')
                ffd_drop = tf.placeholder(dtype=tf.float32, shape=(), name='ffd_drop')
                is_train = tf.placeholder(dtype=tf.bool, shape=(), name='is_train')

            # forward
            logits, final_embedding, att_val, centers_embed, test_final_embeed = model.inference(ftr_in_list, nb_classes, nb_nodes, is_train,
                                                               attn_drop, ffd_drop,
                                                               bias_mat_list=bias_in_list,
                                                               hid_units=hid_units, n_heads=n_heads, features=fea_list, labels=rawlabels,
                                                               residual=residual, activation=nonlinearity, feature_size=ft_size)

            log_resh = tf.reshape(logits, [-1, nb_classes])
            lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
            msk_resh = tf.reshape(msk_in, [-1])

            osm_caa_loss = OSM_CAA_Loss(batch_size=nb_nodes)
            osm_loss = osm_caa_loss.forward

            osmLoss, checkvalue = osm_loss(final_embedding, rawlabels, centers_embed)
            SoftMaxloss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
            loss = osmLoss

            accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)
            # optimzie
            train_op = model.training(loss, lr, l2_coef)

            Path = 'pre_trained/{}/{}/{}'.format(dataset, dataset, self.name)
            self.mkdir(Path)
            checkpt_file = '{}/allMP_multi_{}_.ckpt'.format(Path, featype)
            saver = tf.train.Saver()

            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())

            ts_size = fea_list[0].shape[0]
            ts_step = 0
            ts_loss = 0.0
            ts_acc = 0.0

            with tf.Session(config=config) as sess:
                sess.run(init_op)
                saver.restore(sess, checkpt_file)

                while ts_step * batch_size < ts_size:
                    fd1 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size]
                           for i, d in zip(ftr_in_list, fea_list)}
                    fd2 = {i: d[ts_step * batch_size:(ts_step + 1) * batch_size]
                           for i, d in zip(bias_in_list, biases_list)}
                    fd3 = {lbl_in: y_all[ts_step * batch_size:(ts_step + 1) * batch_size],
                           msk_in: all_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                           metric_ftr_in: rawFeature,
                          is_train: False,
                          attn_drop: 0.0,
                          ffd_drop: 0.0}

                    fd = fd1
                    fd.update(fd2)
                    fd.update(fd3)
                    test_final_embeed_check = sess.run([ test_final_embeed], feed_dict=fd)
                    ts_step += 1

                xx2 = np.expand_dims(test_final_embeed_check, axis=0)[all_mask]
                yy = y_all[all_mask]

                labels, numberofLabels = self.getLabel(yy)

                from utils import  clustering, pairwise_precision_recall_f1

                clusters_pred = clustering(xx2, num_clusters=numberofLabels)
                prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels)
                print ('prec: ', prec, ', rec: ', rec, ', f1: ', f1, ', originNumberOfClusterlabels: ', numberofLabels)

                if needtSNE:
                    tSNEAnanlyse(rawFeature, labels, join(settings.PIC_DIR, "MetricLearning", "rawReature_%s_features.png" % (self.name)))
                    tSNEAnanlyse(xx2, labels, join(settings.PIC_DIR, "MetricLearning", "rawReature_%s_xx2.png" % (self.name)))
Beispiel #4
0
nb_nodes = features.shape[0]
ft_size = features.shape[1]
nb_classes = y_train.shape[1]

adj = adj.todense()

features = features[np.newaxis]
adj = adj[np.newaxis]
y_train = y_train[np.newaxis]
y_val = y_val[np.newaxis]
y_test = y_test[np.newaxis]
train_mask = train_mask[np.newaxis]
val_mask = val_mask[np.newaxis]
test_mask = test_mask[np.newaxis]

biases = process.adj_to_bias(adj, [nb_nodes], nhood=1)

with tf.Graph().as_default():
    with tf.name_scope('input'):
        ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size))
        bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes))
        lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes))
        msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))
        attn_drop = tf.placeholder(dtype=tf.float32, shape=())
        ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
        is_train = tf.placeholder(dtype=tf.bool, shape=())

    logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train,
                                attn_drop, ffd_drop,
                                bias_mat=bias_in,
                                hid_units=hid_units, n_heads=n_heads,
Beispiel #5
0
    tr_msk, vl_msk, ts_msk = process_ppi.process_p2p()

# only difference are the name and that there are variables fpr train, validation and training for feature, adjacent and
# mask now
y_train = train_labels
y_val = val_labels
y_test = test_labels
train_mask = tr_msk
val_mask = vl_msk
test_mask = ts_msk

nb_nodes = train_feat.shape[1]
ft_size = train_feat.shape[2]
nb_classes = y_train.shape[2]

train_adj = process.adj_to_bias(train_adj, [nb_nodes] * 20, nhood=1)
val_adj = process.adj_to_bias(val_adj, [nb_nodes] * 2, nhood=1)
test_adj = process.adj_to_bias(test_adj, [nb_nodes] * 2, nhood=1)

with tf.Graph().as_default():
    with tf.name_scope('input'):
        ftr_in = tf.placeholder(dtype=tf.float32,
                                shape=(batch_size, nb_nodes, ft_size),
                                name='features')
        bias_in = tf.placeholder(dtype=tf.float32,
                                 shape=(batch_size, nb_nodes, nb_nodes),
                                 name='bias')
        lbl_in = tf.placeholder(dtype=tf.int32,
                                shape=(batch_size, nb_nodes, nb_classes),
                                name='label')
        msk_in = tf.placeholder(dtype=tf.int32,
Beispiel #6
0
train_adj, val_adj, test_adj, train_feat, val_feat, test_feat, train_labels, val_labels, test_labels, train_nodes, val_nodes, test_nodes, train_mask, val_mask, test_mask = process_inductive.load_ppi(
    dataset)

for i in range(train_feat.shape[0]):
    train_feat[i] = process.preprocess_features2(train_feat[i])
for i in range(val_feat.shape[0]):
    val_feat[i] = process.preprocess_features2(val_feat[i])
for i in range(test_feat.shape[0]):
    test_feat[i] = process.preprocess_features2(test_feat[i])

nb_nodes = train_feat.shape[1]
ft_size = train_feat.shape[2]
nb_classes = train_labels.shape[2]

train_biases = process.adj_to_bias(train_adj, [nb_nodes] * train_adj.shape[0],
                                   nhood=1)
val_biases = process.adj_to_bias(val_adj, [nb_nodes] * val_adj.shape[0],
                                 nhood=1)
test_biases = process.adj_to_bias(test_adj, [nb_nodes] * test_adj.shape[0],
                                  nhood=1)

with tf.Graph().as_default():
    with tf.name_scope('input'):
        ftr_in = tf.placeholder(dtype=tf.float32,
                                shape=(batch_size, nb_nodes, ft_size))
        bias_in = tf.placeholder(dtype=tf.float32,
                                 shape=(batch_size, nb_nodes, nb_nodes))
        lbl_in = tf.placeholder(dtype=tf.int32,
                                shape=(batch_size, nb_nodes, nb_classes))
        msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))
        attn_drop = tf.placeholder(dtype=tf.float32, shape=())
Beispiel #7
0
# adj = adj.todense()

features = features[np.newaxis]  # [1, nb_node, ft_size]
adj_list = [adj[np.newaxis] for adj in adj_list]

y_train = y_train[np.newaxis]
y_val = y_val[np.newaxis]
y_test = y_test[np.newaxis]

train_mask = train_mask[np.newaxis]
val_mask = val_mask[np.newaxis]
test_mask = test_mask[np.newaxis]

biases_list = [
    process.adj_to_bias(adj, [nb_nodes], nhood=1) for adj in adj_list
]

with tf.Graph().as_default():
    with tf.name_scope('input'):
        ftr_in = tf.placeholder(dtype=tf.float32,
                                shape=(batch_size, nb_nodes, ft_size))
        bias_in_list = [
            tf.placeholder(dtype=tf.float32,
                           shape=(batch_size, nb_nodes, nb_nodes))
            for _ in range(len(biases_list))
        ]
        lbl_in = tf.placeholder(dtype=tf.int32,
                                shape=(batch_size, nb_nodes, nb_classes))
        msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))
        attn_drop = tf.placeholder(dtype=tf.float32, shape=())
Beispiel #8
0
def train(sparse, epochs, lr, patience, l2_coef, hid_units, n_heads, residual,
          attention_drop, edge_attr_directory, node_features_path, label_path,
          log_path, train_ratio):
    # flags = tf.app.flags
    # FLAGS = flags.FLAGS
    nb_epochs = epochs

    # flags.DEFINE_string('summaries_dir', log_path, 'Summaries directory')
    if tf.gfile.Exists(log_path):
        tf.gfile.DeleteRecursively(log_path)
    tf.gfile.MakeDirs(log_path)

    checkpt_file = 'pre_trained/mod_test.ckpt'

    dataset = 'know'

    # training params
    batch_size = 1
    nonlinearity = tf.nn.elu
    if sparse:
        model = SpGAT
    else:
        model = GAT
    nhood = 1
    in_drop = attention_drop

    print('Dataset: ' + dataset)
    print('----- Opt. hyperparams -----')
    print('lr: ' + str(lr))
    print('l2_coef: ' + str(l2_coef))
    print('----- Archi. hyperparams -----')
    print('nb. layers: ' + str(len(hid_units)))
    print('nb. units per layer: ' + str(hid_units))
    print('nb. attention heads: ' + str(n_heads))
    print('residual: ' + str(residual))
    print('nonlinearity: ' + str(nonlinearity))
    print('model: ' + str(model))

    adjs, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_attr_name = process.load_data(
        edge_attr_directory, node_features_path, label_path, train_ratio)
    features, spars = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]

    features = features[np.newaxis]
    # adj = adj[np.newaxis]
    # adjs = [adj[np.newaxis] for adj in adjs]
    y_train = y_train[np.newaxis]
    y_val = y_val[np.newaxis]
    y_test = y_test[np.newaxis]
    train_mask = train_mask[np.newaxis]
    val_mask = val_mask[np.newaxis]
    test_mask = test_mask[np.newaxis]

    if sparse:
        biases = process.preprocess_adj_bias(
            adjs[0], to_unweighted=True
        )  # sparse (indices, values, dense_shape), the graph topologies (unweighted)
        adjs = [
            tf.SparseTensor(
                *process.preprocess_adj_bias(adj, to_unweighted=False))
            for adj in adjs
        ]
    else:
        biases = process.adj_to_bias(adjs[0], [nb_nodes], nhood=nhood)
    # biases = process.get_bias_mat(adjs[0], [nb_nodes], nhood=nhood)
    print(biases)

    # with tf.Graph().as_default():
    with tf.name_scope('input'):
        ftr_in = tf.placeholder(dtype=tf.float32,
                                shape=(batch_size, nb_nodes, ft_size))
        if sparse:
            # bias_in = tf.sparse_placeholder(dtype=tf.float32)
            bias_in = tf.SparseTensor(*biases)
        else:
            bias_in = tf.placeholder(dtype=tf.float32,
                                     shape=(batch_size, nb_nodes, nb_nodes))
        lbl_in = tf.placeholder(dtype=tf.int32,
                                shape=(batch_size, nb_nodes, nb_classes))
        msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))
        attn_drop = tf.placeholder(dtype=tf.float32, shape=())
        ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
        is_train = tf.placeholder(dtype=tf.bool, shape=())

    logits = model.inference(inputs=ftr_in,
                             edge_adjs=adjs,
                             nb_classes=nb_classes,
                             nb_nodes=nb_nodes,
                             training=is_train,
                             attn_drop=attn_drop,
                             ffd_drop=ffd_drop,
                             bias_mat=bias_in,
                             hid_units=hid_units,
                             n_heads=n_heads,
                             residual=residual,
                             activation=nonlinearity,
                             edge_attr_name=edge_attr_name)
    log_resh = tf.reshape(logits, [-1, nb_classes])
    lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
    msk_resh = tf.reshape(msk_in, [-1])
    loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
    micro_f1 = model.micro_f1(log_resh, lab_resh, msk_resh)
    accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)

    train_op = model.training(loss, lr, l2_coef)

    saver = tf.train.Saver()

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    vlss_mn = np.inf
    vacc_mx = 0.0
    vmf1_mx = 0.0
    curr_step = 0

    with tf.Session() as sess:
        merged = tf.summary.merge_all()
        train_summary_writer = tf.summary.FileWriter(log_path + '/train')
        test_summary_writer = tf.summary.FileWriter(log_path + '/test')
        sess.run(init_op)

        train_loss_avg = 0
        train_acc_avg = 0
        train_microf1_avg = 0
        val_loss_avg = 0
        val_acc_avg = 0
        val_microf1_avg = 0

        for epoch in range(nb_epochs):
            tr_step = 0
            tr_size = features.shape[0]

            while tr_step * batch_size < tr_size:
                if sparse:
                    bbias = biases
                else:
                    bbias = biases[tr_step * batch_size:(tr_step + 1) *
                                   batch_size]

                _, summary, loss_value_tr, acc_tr, micro_f1_tr = sess.run(
                    [train_op, merged, loss, accuracy, micro_f1],
                    feed_dict={
                        ftr_in:
                        features[tr_step * batch_size:(tr_step + 1) *
                                 batch_size],
                        # bias_in: bbias,
                        lbl_in:
                        y_train[tr_step * batch_size:(tr_step + 1) *
                                batch_size],
                        msk_in:
                        train_mask[tr_step * batch_size:(tr_step + 1) *
                                   batch_size],
                        is_train:
                        True,
                        attn_drop:
                        attention_drop,
                        ffd_drop:
                        in_drop
                    })
                print(loss_value_tr)
                train_microf1_avg += micro_f1_tr
                train_loss_avg += loss_value_tr
                train_acc_avg += acc_tr
                tr_step += 1
                train_summary_writer.add_summary(summary, epoch)

            vl_step = 0
            vl_size = features.shape[0]

            while vl_step * batch_size < vl_size:
                if sparse:
                    bbias = biases
                else:
                    bbias = biases[vl_step * batch_size:(vl_step + 1) *
                                   batch_size]
                summary, loss_value_vl, acc_vl, micro_f1_vl = sess.run(
                    [merged, loss, accuracy, micro_f1],
                    feed_dict={
                        ftr_in:
                        features[vl_step * batch_size:(vl_step + 1) *
                                 batch_size],
                        # bias_in: bbias,
                        lbl_in:
                        y_val[vl_step * batch_size:(vl_step + 1) * batch_size],
                        msk_in:
                        val_mask[vl_step * batch_size:(vl_step + 1) *
                                 batch_size],
                        is_train:
                        False,
                        attn_drop:
                        0.0,
                        ffd_drop:
                        0.0
                    })
                val_microf1_avg += micro_f1_vl
                val_loss_avg += loss_value_vl
                val_acc_avg += acc_vl
                vl_step += 1
                test_summary_writer.add_summary(summary, epoch)

            print(
                'Training: loss = %.5f, acc = %.5f, micro_f1 = %.5f | Val: loss = %.5f, acc = %.5f, micro_f1 = %.5f'
                % (train_loss_avg / tr_step, train_acc_avg / tr_step,
                   train_microf1_avg / tr_step, val_loss_avg / vl_step,
                   val_acc_avg / vl_step, val_microf1_avg / vl_step))

            if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn:
                if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn:
                    vacc_early_model = val_acc_avg / vl_step
                    vlss_early_model = val_loss_avg / vl_step
                    saver.save(sess, checkpt_file)
                vmf1_mx = np.max((val_microf1_avg / vl_step, vmf1_mx))
                vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx))
                vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn))
                curr_step = 0
            else:
                curr_step += 1
                if curr_step == patience:
                    print('Early stop! Min loss: ', vlss_mn,
                          ', Max accuracy: ', vacc_mx, ', Max Micro-f1',
                          vmf1_mx)
                    print('Early stop model validation loss: ',
                          vlss_early_model, ', accuracy: ', vacc_early_model)
                    break

            train_loss_avg = 0
            train_acc_avg = 0
            train_microf1_avg = 0
            val_loss_avg = 0
            val_acc_avg = 0
            val_microf1_avg = 0

        saver.restore(sess, checkpt_file)

        ts_size = features.shape[0]
        ts_step = 0
        ts_loss = 0.0
        ts_acc = 0.0

        while ts_step * batch_size < ts_size:
            if sparse:
                bbias = biases
            else:
                bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size]
            loss_value_ts, acc_ts = sess.run(
                [loss, accuracy],
                feed_dict={
                    ftr_in:
                    features[ts_step * batch_size:(ts_step + 1) * batch_size],
                    # bias_in: bbias,
                    lbl_in:
                    y_test[ts_step * batch_size:(ts_step + 1) * batch_size],
                    msk_in:
                    test_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                    is_train:
                    False,
                    attn_drop:
                    0.0,
                    ffd_drop:
                    0.0
                })
            ts_loss += loss_value_ts
            ts_acc += acc_ts
            ts_step += 1

        print('Test loss:', ts_loss / ts_step, '; Test accuracy:',
              ts_acc / ts_step)

        sess.close()
Beispiel #9
0
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    dataset = args.dataset
    # training params
    batch_size = 1
    nb_epochs = 100000
    patience = 100
    lr = args.lr  
    l2_coef = args.l2  
    hid_units = [args.units]  
    n_heads = [1, 1] # layers
    drop_out = args.drop
    residual = False
    nonlinearity = tf.nn.elu
    model = SpHGAT

    print('Dataset: ' + dataset)
    print('----- Opt. hyperparams -----')
    print('lr: ' + str(lr))
    print('l2_coef: ' + str(l2_coef))

    sparse = True

    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(dataset)
    features, spars = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]

    features = features[np.newaxis]
    y_train = y_train[np.newaxis]
    y_val = y_val[np.newaxis]
    y_test = y_test[np.newaxis]
    train_mask = train_mask[np.newaxis]
    val_mask = val_mask[np.newaxis]
    test_mask = test_mask[np.newaxis]

    if sparse:
        biases = process.preprocess_adj_bias(adj)
    else:
        adj = adj.todense()
        adj = adj[np.newaxis]
        biases = process.adj_to_bias(adj, [nb_nodes], nhood=1)

    with tf.Graph().as_default():
        with tf.name_scope('input'):
            ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size))
            if sparse:
                bias_in = tf.sparse_placeholder(dtype=tf.float32)
            else:
                bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes))
            lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes))
            msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))
            attn_drop = tf.placeholder(dtype=tf.float32, shape=())
            ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
            is_train = tf.placeholder(dtype=tf.bool, shape=())

        logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train,
                                    attn_drop, ffd_drop,
                                    bias_mat=bias_in,
                                    hid_units=hid_units, n_heads=n_heads,
                                    activation=nonlinearity)
        log_resh = tf.reshape(logits, [-1, nb_classes])
        lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
        msk_resh = tf.reshape(msk_in, [-1])
        loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
        accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)

        pred_all = tf.cast(tf.argmax(log_resh, 1), dtype=tf.int32)
        real_all = tf.cast(tf.argmax(lab_resh, 1), dtype=tf.int32)
        train_op = model.my_training(loss, lr, l2_coef)

        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())


        vlss_mn = np.inf
        vacc_mx = 0.0
        tlss_mn = 0.0
        tacc_mx = 0.0
        curr_step = 0
        with tf.Session(config=config) as sess:
            sess.run(init_op)
            train_loss_avg = 0
            train_acc_avg = 0
            val_loss_avg = 0
            val_acc_avg = 0

            for epoch in range(nb_epochs):
                tr_step = 0
                tr_size = features.shape[0]

                while tr_step * batch_size < tr_size:
                    if sparse:
                        bbias = biases
                    else:
                        bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size]

                    _, loss_value_tr, acc_tr = sess.run([train_op, loss, accuracy],
                        feed_dict={
                            ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size],
                            bias_in: bbias,
                            lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size],
                            msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size],
                            is_train: True,
                            attn_drop: drop_out, ffd_drop: drop_out})
                    train_loss_avg += loss_value_tr
                    train_acc_avg += acc_tr
                    tr_step += 1

                vl_step = 0
                vl_size = features.shape[0]

                while vl_step * batch_size < vl_size:
                    if sparse:
                        bbias = biases
                    else:
                        bbias = biases[vl_step*batch_size:(vl_step+1)*batch_size]
                    loss_value_vl, acc_vl = sess.run([loss, accuracy],
                        feed_dict={
                            ftr_in: features[vl_step*batch_size:(vl_step+1)*batch_size],
                            bias_in: bbias,
                            lbl_in: y_val[vl_step*batch_size:(vl_step+1)*batch_size],
                            msk_in: val_mask[vl_step*batch_size:(vl_step+1)*batch_size],
                            is_train: False,
                            attn_drop: 0.0, ffd_drop: 0.0})
                    val_loss_avg += loss_value_vl
                    val_acc_avg += acc_vl
                    vl_step += 1


                ts_size = features.shape[0]
                ts_step = 0
                ts_loss = 0.0
                ts_acc = 0.0

                print(epoch, 'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' %
                        (train_loss_avg/tr_step, train_acc_avg/tr_step,
                        val_loss_avg/vl_step, val_acc_avg/vl_step))

                if val_acc_avg/vl_step >= vacc_mx or val_loss_avg/vl_step <= vlss_mn:
                    vacc_mx = np.max((val_acc_avg/vl_step, vacc_mx))
                    vlss_mn = np.min((val_loss_avg/vl_step, vlss_mn))
                    curr_step = 0

                    while ts_step * batch_size < ts_size:
                        if sparse:
                            bbias = biases
                        else:
                            bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size]
                        loss_value_ts, acc_ts = sess.run(
                            [loss, accuracy],
                            feed_dict={
                                ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size],
                                bias_in: bbias,
                                lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size],
                                msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                                is_train: False,
                                attn_drop: 0.0, ffd_drop: 0.0})
                        ts_loss += loss_value_ts
                        ts_acc += acc_ts
                        ts_step += 1
                    tlss_mn = ts_loss / ts_step
                    tacc_mx = ts_acc / ts_step
                else:
                    curr_step += 1
                    if curr_step == patience:
                        print('Early stop! Min validation loss: ', vlss_mn, ', Max accuracy: ', vacc_mx)
                        print('Test loss:', tlss_mn, ', accuracy {}'.format(tacc_mx))
                        break
                train_loss_avg = 0
                train_acc_avg = 0
                val_loss_avg = 0
                val_acc_avg = 0
            sess.close()
Beispiel #10
0
def run_gat(dataset, batch_size, nb_epochs, patience, lr, l2_coef, hid_units,
            n_heads, residual, nonlinearity, model, checkpt_file, nhood):
    # redirect output to file
    import sys

    orig_stdout = sys.stdout
    if os.path.isfile(os.path.dirname(checkpt_file) + 'out.txt'):
        f = open(os.path.dirname(checkpt_file) + 'out.txt', 'a')
        print('\n\n\n\n')
    else:
        f = open(os.path.dirname(checkpt_file) + 'out.txt', 'w')
    sys.stdout = f

    print('Dataset: ' + dataset)
    print('batch_size: ' + str(batch_size))
    print('----- Opt. hyperparams -----')
    print('lr: ' + str(lr))
    print('l2_coef: ' + str(l2_coef))
    print('----- Archi. hyperparams -----')
    print('nb. layers: ' + str(len(hid_units)))
    print('nb. units per layer: ' + str(hid_units))
    print('nb. attention heads: ' + str(n_heads))
    print('residual: ' + str(residual))
    print('nonlinearity: ' + str(nonlinearity))
    print('model: ' + str(model))
    print('nhood: ' + str(nhood))

    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(
        dataset)
    features, spars = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]

    adj = adj.todense()

    features = features[np.newaxis]
    adj = adj[np.newaxis]
    y_train = y_train[np.newaxis]
    y_val = y_val[np.newaxis]
    y_test = y_test[np.newaxis]
    train_mask = train_mask[np.newaxis]
    val_mask = val_mask[np.newaxis]
    test_mask = test_mask[np.newaxis]

    biases = process.adj_to_bias(adj, [nb_nodes], nhood=nhood)

    with tf.Graph().as_default():
        with tf.name_scope('input'):
            ftr_in = tf.placeholder(dtype=tf.float32,
                                    shape=(batch_size, nb_nodes, ft_size))
            bias_in = tf.placeholder(dtype=tf.float32,
                                     shape=(batch_size, nb_nodes, nb_nodes))
            lbl_in = tf.placeholder(dtype=tf.int32,
                                    shape=(batch_size, nb_nodes, nb_classes))
            msk_in = tf.placeholder(dtype=tf.int32,
                                    shape=(batch_size, nb_nodes))
            attn_drop = tf.placeholder(dtype=tf.float32, shape=())
            ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
            is_train = tf.placeholder(dtype=tf.bool, shape=())

        logits = model.inference(ftr_in,
                                 nb_classes,
                                 nb_nodes,
                                 is_train,
                                 attn_drop,
                                 ffd_drop,
                                 bias_mat=bias_in,
                                 hid_units=hid_units,
                                 n_heads=n_heads,
                                 residual=residual,
                                 activation=nonlinearity)
        log_resh = tf.reshape(logits, [-1, nb_classes])
        lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
        msk_resh = tf.reshape(msk_in, [-1])
        loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
        accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)

        train_op = model.training(loss, lr, l2_coef)

        saver = tf.train.Saver()

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        vlss_mn = np.inf
        vacc_mx = 0.0
        curr_step = 0

        start = time.time()

        with tf.Session() as sess:
            sess.run(init_op)

            train_loss_avg = 0
            train_acc_avg = 0
            val_loss_avg = 0
            val_acc_avg = 0

            for epoch in range(nb_epochs):
                tr_step = 0
                tr_size = features.shape[0]

                while tr_step * batch_size < tr_size:
                    _, loss_value_tr, acc_tr = sess.run(
                        [train_op, loss, accuracy],
                        feed_dict={
                            ftr_in:
                            features[tr_step * batch_size:(tr_step + 1) *
                                     batch_size],
                            bias_in:
                            biases[tr_step * batch_size:(tr_step + 1) *
                                   batch_size],
                            lbl_in:
                            y_train[tr_step * batch_size:(tr_step + 1) *
                                    batch_size],
                            msk_in:
                            train_mask[tr_step * batch_size:(tr_step + 1) *
                                       batch_size],
                            is_train:
                            True,
                            attn_drop:
                            0.6,
                            ffd_drop:
                            0.6
                        })
                    train_loss_avg += loss_value_tr
                    train_acc_avg += acc_tr
                    tr_step += 1

                vl_step = 0
                vl_size = features.shape[0]

                while vl_step * batch_size < vl_size:
                    loss_value_vl, acc_vl = sess.run(
                        [loss, accuracy],
                        feed_dict={
                            ftr_in:
                            features[vl_step * batch_size:(vl_step + 1) *
                                     batch_size],
                            bias_in:
                            biases[vl_step * batch_size:(vl_step + 1) *
                                   batch_size],
                            lbl_in:
                            y_val[vl_step * batch_size:(vl_step + 1) *
                                  batch_size],
                            msk_in:
                            val_mask[vl_step * batch_size:(vl_step + 1) *
                                     batch_size],
                            is_train:
                            False,
                            attn_drop:
                            0.0,
                            ffd_drop:
                            0.0
                        })
                    val_loss_avg += loss_value_vl
                    val_acc_avg += acc_vl
                    vl_step += 1

                print(
                    'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f'
                    % (train_loss_avg / tr_step, train_acc_avg / tr_step,
                       val_loss_avg / vl_step, val_acc_avg / vl_step))

                if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn:
                    if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn:
                        vacc_early_model = val_acc_avg / vl_step
                        vlss_early_model = val_loss_avg / vl_step
                        saver.save(sess, checkpt_file)
                    vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx))
                    vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn))
                    curr_step = 0
                else:
                    curr_step += 1
                    if curr_step == patience:
                        print('Early stop! Min loss: ', vlss_mn,
                              ', Max accuracy: ', vacc_mx)
                        print('Early stop model validation loss: ',
                              vlss_early_model, ', accuracy: ',
                              vacc_early_model)
                        break

                train_loss_avg = 0
                train_acc_avg = 0
                val_loss_avg = 0
                val_acc_avg = 0

            saver.restore(sess, checkpt_file)

            ts_size = features.shape[0]
            ts_step = 0
            ts_loss = 0.0
            ts_acc = 0.0

            while ts_step * batch_size < ts_size:
                loss_value_ts, acc_ts = sess.run(
                    [loss, accuracy],
                    feed_dict={
                        ftr_in:
                        features[ts_step * batch_size:(ts_step + 1) *
                                 batch_size],
                        bias_in:
                        biases[ts_step * batch_size:(ts_step + 1) *
                               batch_size],
                        lbl_in:
                        y_test[ts_step * batch_size:(ts_step + 1) *
                               batch_size],
                        msk_in:
                        test_mask[ts_step * batch_size:(ts_step + 1) *
                                  batch_size],
                        is_train:
                        False,
                        attn_drop:
                        0.0,
                        ffd_drop:
                        0.0
                    })
                ts_loss += loss_value_ts
                ts_acc += acc_ts
                ts_step += 1

            print('Test loss:', ts_loss / ts_step, '; Test accuracy:',
                  ts_acc / ts_step, ' at epoch: ', epoch, ' elapsed time',
                  time.time() - start)

            sess.close()
    sys.stdout = orig_stdout
    f.close()
Beispiel #11
0
train_nodes, val_nodes, test_nodes,\
tr_msk, vl_msk, ts_msk\
= process_ppi.process_p2p(is_toy_model)

# for i in range(train_feat.shape[0]):
#     train_feat[i] = process.preprocess_features2(train_feat[i])
# for i in range(val_feat.shape[0]):
#     val_feat[i] = process.preprocess_features2(val_feat[i])
# for i in range(test_feat.shape[0]):
#     test_feat[i] = process.preprocess_features2(test_feat[i])

nb_nodes = train_feat.shape[1]
ft_size = train_feat.shape[2]
nb_classes = train_labels.shape[2]

tr_biases = process.adj_to_bias(train_adj, train_nodes, nhood=1)
vl_biases = process.adj_to_bias(val_adj, val_nodes, nhood=1)
ts_biases = process.adj_to_bias(test_adj, test_nodes, nhood=1)

with tf.Graph().as_default():
    with tf.name_scope('input'):
        ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size))
        bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes))
        lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes))
        msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))
        attn_drop = tf.placeholder(dtype=tf.float32, shape=())
        ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
        is_train = tf.placeholder(dtype=tf.bool, shape=())

    logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train,
                                attn_drop, ffd_drop,
Beispiel #12
0
def run_gat(dataset,
            batch_size,
            nb_epochs,
            patience,
            lr,
            l2_coef,
            hid_units,
            n_heads,
            residual,
            nonlinearity,
            model,
            checkpt_file,
            nhood,
            param_attn_drop=0.6,
            param_ffd_drop=0.6,
            sparse=False):
    '''
    Function that runs all the experiments.
    :param dataset: The string name of the dataset.
    :param batch_size: Number of samples per batch. Has to be one for spartial execution.
    :param nb_epochs: Number of epochs that the method runs
    :param patience: The number of epochs with no improvement in validation accuracy that stops the training.
    :param lr: Learning rate.
    :param l2_coef: The L2 regularization strength.
    :param hid_units: List. Number of features the respecting layer produces from the input features.
    :param n_heads: List. Number of entries is the number of layers. The elements value is the number of attention heads.
    :param residual: Whether or not to use residual connections in the hidden layers.
    :param nonlinearity: tensorflow function for non-linearity
    :param model: Model that inherits from BasGAttn and implements the inference method
    :param checkpt_file: Location where the logs, output and model checkpoints are saved
    :param nhood: The neighborhood to consider. One for direct neighborhood and two for neighbors of neighbors.
    :param param_attn_drop: Drops a percent of attention coefficients.
    :param param_ffd_drop: Drops a percent of inputs from the previous layer.
    :param sparse: If True, the model has to be SpGAT
    :return: Prints and logs results.
    '''

    # necessary work around to run on GPU
    '''
    from tensorflow.compat.v1 import ConfigProto
    from tensorflow.compat.v1 import InteractiveSession

    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    '''

    # redirect output to file
    import sys

    orig_stdout = sys.stdout
    if os.path.isfile(os.path.dirname(checkpt_file) + 'out.txt'):
        f = open(os.path.dirname(checkpt_file) + 'out.txt', 'a')
        sys.stdout = f
        print('\n\n\n\n')
    else:
        f = open(os.path.dirname(checkpt_file) + 'out.txt', 'w')
        sys.stdout = f

    print('Dataset: ' + dataset)
    print('batch_size: ' + str(batch_size))
    print('----- Opt. hyperparams -----')
    print('lr: ' + str(lr))
    print('l2_coef: ' + str(l2_coef))
    print('----- Archi. hyperparams -----')
    print('nb. layers: ' + str(len(hid_units)))
    print('nb. units per layer: ' + str(hid_units))
    print('nb. attention heads: ' + str(n_heads))
    print('residual: ' + str(residual))
    print('nonlinearity: ' + str(nonlinearity))
    print('model: ' + str(model))
    print('nhood: ' + str(nhood))
    print('attn_drop: ' + str(param_attn_drop))
    print('ffd_drop: ' + str(param_ffd_drop))

    # load any of the three transductive datasets
    # adj has information about the connections
    # features is a node node x features matrix with the features for each node
    # y_... has the label for each class in a node x class matrix
    # mask has 0 or 1 as value in a node vector, this is used to mask train, val and test set
    # so for all nodes all information is calculated, but only nodes masked with 1 are evaluated
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(
        dataset)
    # preprocessing steps
    features, spars = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]

    features = features[np.newaxis]
    # adj = adj[np.newaxis]
    y_train = y_train[np.newaxis]
    y_val = y_val[np.newaxis]
    y_test = y_test[np.newaxis]
    train_mask = train_mask[np.newaxis]
    val_mask = val_mask[np.newaxis]
    test_mask = test_mask[np.newaxis]

    # the adjacency matrix is transformed into a bias that is added.
    # when no connection between nodes exist in the specified neighborhood, the value of the attention between
    # both nodes is set to a big negative value, pratically canceling out the effect.
    if sparse:
        biases = process.preprocess_adj_bias(adj)
    else:
        adj = adj.todense()
        adj = adj[np.newaxis]
        biases = process.adj_to_bias(adj, [nb_nodes], nhood=1)

    with tf.Graph().as_default():
        with tf.name_scope('input'):
            # initialization
            ftr_in = tf.placeholder(dtype=tf.float32,
                                    shape=(batch_size, nb_nodes, ft_size))
            if sparse:
                bias_in = tf.sparse_placeholder(dtype=tf.float32)
            else:
                bias_in = tf.placeholder(dtype=tf.float32,
                                         shape=(batch_size, nb_nodes,
                                                nb_nodes))

            # bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes))
            lbl_in = tf.placeholder(dtype=tf.int32,
                                    shape=(batch_size, nb_nodes, nb_classes))
            msk_in = tf.placeholder(dtype=tf.int32,
                                    shape=(batch_size, nb_nodes))
            attn_drop = tf.placeholder(dtype=tf.float32, shape=())
            ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
            is_train = tf.placeholder(dtype=tf.bool, shape=())

        logits = model.inference(ftr_in,
                                 nb_classes,
                                 nb_nodes,
                                 is_train,
                                 attn_drop,
                                 ffd_drop,
                                 bias_mat=bias_in,
                                 hid_units=hid_units,
                                 n_heads=n_heads,
                                 residual=residual,
                                 activation=nonlinearity)
        log_resh = tf.reshape(logits, [-1, nb_classes])
        lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
        msk_resh = tf.reshape(msk_in, [-1])
        loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
        accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)

        train_op = model.training(loss, lr, l2_coef)

        saver = tf.train.Saver()

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        vlss_mn = np.inf
        vacc_mx = 0.0
        curr_step = 0

        start = time.time()

        with tf.Session() as sess:
            sess.run(init_op)

            train_loss_avg = 0
            train_acc_avg = 0
            val_loss_avg = 0
            val_acc_avg = 0

            for epoch in range(nb_epochs):
                tr_step = 0
                tr_size = features.shape[0]
                # training steps
                while tr_step * batch_size < tr_size:
                    if sparse:
                        bbias = biases
                    else:
                        bbias = biases[tr_step * batch_size:(tr_step + 1) *
                                       batch_size]

                    _, loss_value_tr, acc_tr = sess.run(
                        [train_op, loss, accuracy],
                        feed_dict={
                            ftr_in:
                            features[tr_step * batch_size:(tr_step + 1) *
                                     batch_size],
                            bias_in:
                            bbias,
                            lbl_in:
                            y_train[tr_step * batch_size:(tr_step + 1) *
                                    batch_size],
                            msk_in:
                            train_mask[tr_step * batch_size:(tr_step + 1) *
                                       batch_size],
                            is_train:
                            True,
                            attn_drop:
                            param_attn_drop,
                            ffd_drop:
                            param_ffd_drop
                        })
                    train_loss_avg += loss_value_tr
                    train_acc_avg += acc_tr
                    tr_step += 1

                vl_step = 0
                vl_size = features.shape[0]
                # validation steps
                while vl_step * batch_size < vl_size:
                    if sparse:
                        bbias = biases
                    else:
                        bbias = biases[vl_step * batch_size:(vl_step + 1) *
                                       batch_size]

                    loss_value_vl, acc_vl = sess.run(
                        [loss, accuracy],
                        feed_dict={
                            ftr_in:
                            features[vl_step * batch_size:(vl_step + 1) *
                                     batch_size],
                            bias_in:
                            bbias,
                            lbl_in:
                            y_val[vl_step * batch_size:(vl_step + 1) *
                                  batch_size],
                            msk_in:
                            val_mask[vl_step * batch_size:(vl_step + 1) *
                                     batch_size],
                            is_train:
                            False,
                            attn_drop:
                            0.0,
                            ffd_drop:
                            0.0
                        })
                    val_loss_avg += loss_value_vl
                    val_acc_avg += acc_vl
                    vl_step += 1

                print(
                    'Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f'
                    % (train_loss_avg / tr_step, train_acc_avg / tr_step,
                       val_loss_avg / vl_step, val_acc_avg / vl_step))

                # patience step
                if val_acc_avg / vl_step >= vacc_mx or val_loss_avg / vl_step <= vlss_mn:
                    if val_acc_avg / vl_step >= vacc_mx and val_loss_avg / vl_step <= vlss_mn:
                        vacc_early_model = val_acc_avg / vl_step
                        vlss_early_model = val_loss_avg / vl_step
                        saver.save(sess, checkpt_file)
                    vacc_mx = np.max((val_acc_avg / vl_step, vacc_mx))
                    vlss_mn = np.min((val_loss_avg / vl_step, vlss_mn))
                    curr_step = 0
                else:
                    curr_step += 1
                    if curr_step == patience:
                        print('Early stop! Min loss: ', vlss_mn,
                              ', Max accuracy: ', vacc_mx)
                        print('Early stop model validation loss: ',
                              vlss_early_model, ', accuracy: ',
                              vacc_early_model)
                        break

                train_loss_avg = 0
                train_acc_avg = 0
                val_loss_avg = 0
                val_acc_avg = 0

            saver.restore(sess, checkpt_file)

            ts_size = features.shape[0]
            ts_step = 0
            ts_loss = 0.0
            ts_acc = 0.0
            # evaluate on the training set
            while ts_step * batch_size < ts_size:
                if sparse:
                    bbias = biases
                else:
                    bbias = biases[ts_step * batch_size:(ts_step + 1) *
                                   batch_size]
                loss_value_ts, acc_ts = sess.run(
                    [loss, accuracy],
                    feed_dict={
                        ftr_in:
                        features[ts_step * batch_size:(ts_step + 1) *
                                 batch_size],
                        bias_in:
                        bbias,
                        lbl_in:
                        y_test[ts_step * batch_size:(ts_step + 1) *
                               batch_size],
                        msk_in:
                        test_mask[ts_step * batch_size:(ts_step + 1) *
                                  batch_size],
                        is_train:
                        False,
                        attn_drop:
                        0.0,
                        ffd_drop:
                        0.0
                    })
                ts_loss += loss_value_ts
                ts_acc += acc_ts
                ts_step += 1

            print('Test loss:', ts_loss / ts_step, '; Test accuracy:',
                  ts_acc / ts_step, ' at epoch: ', epoch, ' elapsed time',
                  time.time() - start)

            # log information about the training
            if os.path.isfile(os.path.dirname(checkpt_file) + 'log.csv'):
                print('loading existing log')
                df = pd.read_csv(os.path.dirname(checkpt_file) + 'log.csv',
                                 index_col=['run'])
                print('log: ' + str(df))
            else:
                print('Creating new log')
                df = pd.DataFrame(columns=tracking_params + result_cols)

            log = dict(
                zip(tracking_params + result_cols, [
                    dataset, lr, l2_coef, hid_units, n_heads, residual,
                    str(nonlinearity).split(' ')[1], param_attn_drop,
                    param_ffd_drop, nhood
                ] + [
                    epoch,
                    time.time() - start, vlss_mn, vacc_mx, ts_loss / ts_step,
                    ts_acc / ts_step
                ]))

            print('Adding entry: ' + str(log))

            df = df.append(log, ignore_index=True)
            print('saving logs')
            df.to_csv(os.path.dirname(checkpt_file) + 'log.csv',
                      index_label='run')
            print('log save succesfull')

            sess.close()
    # restore standard output
    sys.stdout = orig_stdout
    f.close()
def train(sparse, hid_units, n_heads, residual, edge_attr_directory,
          node_features_path, label_path, train_ratio, model_path):
    # flags = tf.app.flags
    # FLAGS = flags.FLAGS

    # flags.DEFINE_string('summaries_dir', log_path, 'Summaries directory')
    # if tf.gfile.Exists(log_path):
    #     tf.gfile.DeleteRecursively(log_path)
    # tf.gfile.MakeDirs(log_path)

    checkpt_file = model_path

    dataset = 'know'

    # training params
    batch_size = 1
    nonlinearity = tf.nn.elu
    if sparse:
        model = SpGAT
    else:
        model = GAT
    nhood = 1
    # in_drop = attention_drop

    print('Dataset: ' + dataset)
    print('----- Archi. hyperparams -----')
    print('nb. layers: ' + str(len(hid_units)))
    print('nb. units per layer: ' + str(hid_units))
    print('nb. attention heads: ' + str(n_heads))
    print('residual: ' + str(residual))
    print('nonlinearity: ' + str(nonlinearity))
    print('model: ' + str(model))

    adjs, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_attr_name = process.load_data(
        edge_attr_directory, node_features_path, label_path, train_ratio)
    features, spars = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]

    features = features[np.newaxis]
    # adj = adj[np.newaxis]
    # adjs = [adj[np.newaxis] for adj in adjs]
    y_train = y_train[np.newaxis]
    y_val = y_val[np.newaxis]
    y_test = y_test[np.newaxis]
    train_mask = train_mask[np.newaxis]
    val_mask = val_mask[np.newaxis]
    test_mask = test_mask[np.newaxis]

    if sparse:
        biases = process.preprocess_adj_bias(
            adjs[0], to_unweighted=True
        )  # sparse (indices, values, dense_shape), the graph topologies (unweighted)
        adjs = [
            tf.SparseTensor(
                *process.preprocess_adj_bias(adj, to_unweighted=False))
            for adj in adjs
        ]
    else:
        biases = process.adj_to_bias(adjs[0], [nb_nodes], nhood=nhood)
    # biases = process.get_bias_mat(adjs[0], [nb_nodes], nhood=nhood)
    print(biases)

    # with tf.Graph().as_default():
    with tf.name_scope('input'):
        ftr_in = tf.placeholder(dtype=tf.float32,
                                shape=(batch_size, nb_nodes, ft_size))
        if sparse:
            # bias_in = tf.sparse_placeholder(dtype=tf.float32)
            bias_in = tf.SparseTensor(*biases)
        else:
            bias_in = tf.placeholder(dtype=tf.float32,
                                     shape=(batch_size, nb_nodes, nb_nodes))
        lbl_in = tf.placeholder(dtype=tf.int32,
                                shape=(batch_size, nb_nodes, nb_classes))
        msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))
        attn_drop = tf.placeholder(dtype=tf.float32, shape=())
        ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
        is_train = tf.placeholder(dtype=tf.bool, shape=())

    logits = model.inference(inputs=ftr_in,
                             edge_adjs=adjs,
                             nb_classes=nb_classes,
                             nb_nodes=nb_nodes,
                             training=is_train,
                             attn_drop=attn_drop,
                             ffd_drop=ffd_drop,
                             bias_mat=bias_in,
                             hid_units=hid_units,
                             n_heads=n_heads,
                             residual=residual,
                             activation=nonlinearity,
                             edge_attr_name=edge_attr_name)
    log_resh = tf.reshape(logits, [-1, nb_classes])
    lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
    msk_resh = tf.reshape(msk_in, [-1])
    loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
    micro_f1 = model.micro_f1(log_resh, lab_resh, msk_resh)
    accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)

    # train_op = model.training(loss, lr, l2_coef)

    saver = tf.train.Saver()

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    with tf.Session() as sess:
        sess.run(init_op)

        saver.restore(sess, checkpt_file)

        ts_size = features.shape[0]
        ts_step = 0
        ts_loss = 0.0
        ts_acc = 0.0

        while ts_step * batch_size < ts_size:
            if sparse:
                bbias = biases
            else:
                bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size]
            loss_value_ts, acc_ts = sess.run(
                [loss, accuracy],
                feed_dict={
                    ftr_in:
                    features[ts_step * batch_size:(ts_step + 1) * batch_size],
                    # bias_in: bbias,
                    lbl_in:
                    y_test[ts_step * batch_size:(ts_step + 1) * batch_size],
                    msk_in:
                    test_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                    is_train:
                    False,
                    attn_drop:
                    0.0,
                    ffd_drop:
                    0.0
                })
            ts_loss += loss_value_ts
            ts_acc += acc_ts
            ts_step += 1

        print('Test loss:', ts_loss / ts_step, '; Test accuracy:',
              ts_acc / ts_step)

        sess.close()
Beispiel #14
0
def train():
    sparse = True

    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = process.load_data(dataset, train_size, validation_size)
    features, spars = process.preprocess_features(features)

    nb_nodes = features.shape[0]
    ft_size = features.shape[1]
    nb_classes = y_train.shape[1]

    features = features[np.newaxis]
    y_train = y_train[np.newaxis]
    y_val = y_val[np.newaxis]
    y_test = y_test[np.newaxis]
    train_mask = train_mask[np.newaxis]
    val_mask = val_mask[np.newaxis]
    test_mask = test_mask[np.newaxis]

    if sparse:
        biases = process.preprocess_adj_bias(adj)
    else:
        adj = adj.todense()
        adj = adj[np.newaxis]
        biases = process.adj_to_bias(adj, [nb_nodes], nhood=1)

    with tf.Graph().as_default():
        with tf.name_scope('input'):
            ftr_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, ft_size))
            if sparse:
                #bias_idx = tf.placeholder(tf.int64)
                #bias_val = tf.placeholder(tf.float32)
                #bias_shape = tf.placeholder(tf.int64)
                bias_in = tf.sparse_placeholder(dtype=tf.float32)
            else:
                bias_in = tf.placeholder(dtype=tf.float32, shape=(batch_size, nb_nodes, nb_nodes))
            lbl_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes, nb_classes))
            msk_in = tf.placeholder(dtype=tf.int32, shape=(batch_size, nb_nodes))
            attn_drop = tf.placeholder(dtype=tf.float32, shape=())
            ffd_drop = tf.placeholder(dtype=tf.float32, shape=())
            is_train = tf.placeholder(dtype=tf.bool, shape=())

        logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train,
                                    attn_drop, ffd_drop,
                                    bias_mat=bias_in,
                                    hid_units=hid_units, n_heads=n_heads,
                                    residual=residual, activation=nonlinearity)
        log_resh = tf.reshape(logits, [-1, nb_classes])
        lab_resh = tf.reshape(lbl_in, [-1, nb_classes])
        msk_resh = tf.reshape(msk_in, [-1])
        loss = model.masked_softmax_cross_entropy(log_resh, lab_resh, msk_resh)
        accuracy = model.masked_accuracy(log_resh, lab_resh, msk_resh)

        train_op = model.training(loss, lr, l2_coef)

        saver = tf.train.Saver()

        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

        vlss_mn = np.inf
        vacc_mx = 0.0
        curr_step = 0

        gpu_options = tf.GPUOptions(allow_growth=True)
        with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
            sess.run(init_op)

            train_loss_avg = 0
            train_acc_avg = 0
            val_loss_avg = 0
            val_acc_avg = 0

            for epoch in range(nb_epochs):
                tr_step = 0
                tr_size = features.shape[0]

                while tr_step * batch_size < tr_size:
                    if sparse:
                        bbias = biases
                    else:
                        bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size]

                    _, loss_value_tr, acc_tr = sess.run([train_op, loss, accuracy],
                        feed_dict={
                            ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size],
                            bias_in: bbias,
                            lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size],
                            msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size],
                            is_train: True,
                            attn_drop: 0.6, ffd_drop: 0.6})
                    train_loss_avg += loss_value_tr
                    train_acc_avg += acc_tr
                    tr_step += 1

                if args.validate:
                    vl_step = 0
                    vl_size = features.shape[0]

                    while vl_step * batch_size < vl_size:
                        if sparse:
                            bbias = biases
                        else:
                            bbias = biases[vl_step*batch_size:(vl_step+1)*batch_size]
                        loss_value_vl, acc_vl = sess.run([loss, accuracy],
                            feed_dict={
                                ftr_in: features[vl_step*batch_size:(vl_step+1)*batch_size],
                                bias_in: bbias,
                                lbl_in: y_val[vl_step*batch_size:(vl_step+1)*batch_size],
                                msk_in: val_mask[vl_step*batch_size:(vl_step+1)*batch_size],
                                is_train: False,
                                attn_drop: 0.0, ffd_drop: 0.0})
                        val_loss_avg += loss_value_vl
                        val_acc_avg += acc_vl
                        vl_step += 1
                else:
                    tr_step = 0
                    vl_step = 0
                    vl_size = features.shape[0]
                    val_loss_avg = 0
                    val_acc_avg = 0
                    while tr_step * batch_size < tr_size:
                        if sparse:
                            bbias = biases
                        else:
                            bbias = biases[tr_step*batch_size:(tr_step+1)*batch_size]
                        loss_value_vl, acc_vl = sess.run([loss, accuracy],
                            feed_dict={
                                ftr_in: features[tr_step*batch_size:(tr_step+1)*batch_size],
                                bias_in: bbias,
                                lbl_in: y_train[tr_step*batch_size:(tr_step+1)*batch_size],
                                msk_in: train_mask[tr_step*batch_size:(tr_step+1)*batch_size],
                                is_train: False,
                                attn_drop: 0., ffd_drop: 0.})
                        val_loss_avg += loss_value_vl
                        val_acc_avg += acc_vl
                        vl_step += 1
                        tr_step += 1

                print('%d Training: loss = %.5f, acc = %.5f | Val: loss = %.5f, acc = %.5f' %
                        (epoch, train_loss_avg/tr_step, train_acc_avg/tr_step,
                        val_loss_avg/vl_step, val_acc_avg/vl_step))


                if val_acc_avg/vl_step > vacc_mx or val_loss_avg/vl_step < vlss_mn:
                    if val_acc_avg/vl_step >= vacc_mx and val_loss_avg/vl_step <= vlss_mn:
                        vacc_early_model = val_acc_avg/vl_step
                        vlss_early_model = val_loss_avg/vl_step
                        saver.save(sess, checkpt_file)

                        ts_size = features.shape[0]
                        ts_step = 0
                        ts_loss = 0.0
                        ts_acc = 0.0

                        while ts_step * batch_size < ts_size:
                            if sparse:
                                bbias = biases
                            else:
                                bbias = biases[ts_step * batch_size:(ts_step + 1) * batch_size]
                            loss_value_ts, acc_ts = sess.run([loss, accuracy],
                                                             feed_dict={
                                                                 ftr_in: features[ts_step * batch_size:(ts_step + 1) * batch_size],
                                                                 bias_in: bbias,
                                                                 lbl_in: y_test[ts_step * batch_size:(ts_step + 1) * batch_size],
                                                                 msk_in: test_mask[ts_step * batch_size:(ts_step + 1) * batch_size],
                                                                 is_train: False,
                                                                 attn_drop: 0.0, ffd_drop: 0.0})
                            ts_loss += loss_value_ts
                            ts_acc += acc_ts
                            ts_step += 1

                        print('Test loss:', ts_loss / ts_step, '; Test accuracy:', ts_acc / ts_step)

                    vacc_mx = np.max((val_acc_avg/vl_step, vacc_mx))
                    vlss_mn = np.min((val_loss_avg/vl_step, vlss_mn))
                    curr_step = 0
                else:
                    curr_step += 1
                    if curr_step == patience:
                        print('Early stop! Min loss: ', vlss_mn, ', Max accuracy: ', vacc_mx)
                        print('Early stop model validation loss: ', vlss_early_model, ', accuracy: ', vacc_early_model)
                        break

                train_loss_avg = 0
                train_acc_avg = 0
                val_loss_avg = 0
                val_acc_avg = 0

            saver.restore(sess, checkpt_file)

            ts_size = features.shape[0]
            ts_step = 0
            ts_loss = 0.0
            ts_acc = 0.0

            while ts_step * batch_size < ts_size:
                if sparse:
                    bbias = biases
                else:
                    bbias = biases[ts_step*batch_size:(ts_step+1)*batch_size]
                loss_value_ts, acc_ts = sess.run([loss, accuracy],
                    feed_dict={
                        ftr_in: features[ts_step*batch_size:(ts_step+1)*batch_size],
                        bias_in: bbias,
                        lbl_in: y_test[ts_step*batch_size:(ts_step+1)*batch_size],
                        msk_in: test_mask[ts_step*batch_size:(ts_step+1)*batch_size],
                        is_train: False,
                        attn_drop: 0.0, ffd_drop: 0.0})
                ts_loss += loss_value_ts
                ts_acc += acc_ts
                ts_step += 1

            print('Test loss:', ts_loss/ts_step, '; Test accuracy:', ts_acc/ts_step)

            sess.close()
            return ts_loss/ts_step, ts_acc/ts_step