def format_data(data_source):

    adj, features, labels = load_data(data_source)

    # Store original adjacency matrix (without diagonal entries) for later
    # adj_orig = adj
    # adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    # adj_orig.eliminate_zeros()
    # adj = adj_orig

    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

    # Some preprocessing
    adj_norm = preprocess_graph(adj)

    num_nodes = adj.shape[0]

    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]

    adj_label = adj + sp.eye(adj.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    items = [
        adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label,
        features, labels
    ]
    feas = {}
    for item in items:
        # item_name = [ k for k,v in locals().iteritems() if v == item][0]]
        item_name = retrieve_name(item)
        feas[item_name] = item

    return feas
Esempio n. 2
0
def format_data(data_source):

    adj, features, labels = load_data2(data_source)

    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

    adj_norm = preprocess_graph(adj)

    num_nodes = adj.shape[0]

    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]

    adj_label = adj + sp.eye(adj.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    items = [
        adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label,
        features, labels
    ]
    feas = {}
    for item in items:
        # item_name = [ k for k,v in locals().iteritems() if v == item][0]]
        item_name = retrieve_name(item)
        feas[item_name] = item

    return feas
Esempio n. 3
0
def format_data_new(adj, features):
    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    # Some preprocessing
    adj_norm = preprocess_graph(adj)

    num_nodes = adj.shape[0]

    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    adj_label = adj + sp.eye(adj.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    values = [
        adj, num_features, num_nodes, features_nonzero, pos_weight, norm,
        adj_norm, adj_label, features, adj_orig
    ]
    keys = [
        'adj', 'num_features', 'num_nodes', 'features_nonzero', 'pos_weight',
        'norm', 'adj_norm', 'adj_label', 'features', 'adj_orig'
    ]
    feas = {}
    feas = dict(zip(keys, values))

    return feas
Esempio n. 4
0
def format_data(data_name):
    # Load data

    adj, features, y_test, tx, ty, test_maks, true_labels = load_data(
        data_name)

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    #删除对角线元素
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)
    adj = adj_train
    adj_dense = adj.toarray()

    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

    # Some preprocessing
    adj_norm = preprocess_graph(adj)

    num_nodes = adj.shape[0]
    features_dense = features.tocoo().toarray()

    features = sparse_to_tuple(features.tocoo())
    #num_features是feature的维度
    num_features = features[2][1]
    #features_nonzero就是非零feature的个数
    features_nonzero = features[1].shape[0]

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    adj_label = adj_train + sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    items = [
        adj, num_features, num_nodes, features_nonzero, pos_weight, norm,
        adj_norm, adj_label, features, true_labels, train_edges, val_edges,
        val_edges_false, test_edges, test_edges_false, adj_orig,
        features_dense, adj_dense, features_dense
    ]
    feas = {}

    print('num_features is:', num_features)
    print('num_nodes is:', num_nodes)
    print('features_nonzero is:', features_nonzero)
    print('pos_weight is:', pos_weight)
    print('norm is:', norm)

    for item in items:
        #item_name = [ k for k,v in locals().iteritems() if v == item][0]
        feas[retrieve_name(item)] = item

    return feas
Esempio n. 5
0
def format_data(data_name):
    # Load data
    #adj, features, y_test, tx, ty, test_maks, true_labels = load_data(data_name)
    print("&&&&&&&&&&&&&&&&&", data_name)
    rownetworks, numView, features, truelabels, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(
        data_name)
    adjs_orig = []
    for v in range(numView):
        adj_orig = rownetworks[v]
        adj_orig = adj_orig - sp.dia_matrix(
            (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
        #adj_orig.eliminate_zeros()
        adjs_orig.append(adj_orig)
    adjs_label = rownetworks

    adjs_orig = np.array(adjs_orig)
    adjs = adjs_orig
    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

    # Some preprocessing
    adjs_norm = preprocess_graph(adjs)

    num_nodes = adjs[0].shape[0]

    features = features
    num_features = features.shape[1]
    #features_nonzero = features[1].shape[0]
    fea_pos_weights = float(features.shape[0] * features.shape[1] -
                            features.sum()) / features.sum()
    pos_weights = []
    norms = []
    for v in range(numView):
        pos_weight = float(adjs[v].shape[0] * adjs[v].shape[0] -
                           adjs[v].sum()) / adjs[v].sum()
        norm = adjs[v].shape[0] * adjs[v].shape[0] / float(
            (adjs[v].shape[0] * adjs[v].shape[0] - adjs[v].sum()) * 2)
        pos_weights.append(pos_weight)
        norms.append(norm)
    true_labels = truelabels
    feas = {
        'adjs': adjs_norm,
        'adjs_label': adjs_label,
        'num_features': num_features,
        'num_nodes': num_nodes,
        'true_labels': true_labels,
        'pos_weights': pos_weights,
        'norms': np.array(norms),
        'adjs_norm': adjs_norm,
        'features': features,
        'fea_pos_weights': fea_pos_weights,
        'numView': numView
    }
    return feas
Esempio n. 6
0
def test(saver, adj, features, meta_dir, checkpoints_dir):
    adj_norm, adj_norm_sparse = preprocess_graph(adj)
    placeholders = {
        'features': tf.sparse_placeholder(tf.float32),
        'adj': tf.sparse_placeholder(tf.float32),
        'adj_orig': tf.sparse_placeholder(tf.float32),
        'dropout': tf.placeholder_with_default(0., shape=())
    }

    num_nodes = adj.shape[0]
    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]
    feed_dict = construct_feed_dict(adj_norm, adj_label, features,
                                    placeholders)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})
    # Create model
    saver = tf.train.Saver(max_to_keep=10)
    model = None
    if model_str == "gae_gan":
        model = gaegan(placeholders, num_features, num_nodes, features_nonzero)
    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
    global_steps = tf.get_variable(0, name="globals")
    opt = 0
    # Optimizer
    with tf.name_scope('optimizer'):
        if model_str == 'gae_gan':
            opt = Optimizergaegan(preds=model.x_tilde,
                                  labels=tf.reshape(
                                      tf.sparse_tensor_to_dense(
                                          placeholders['adj_orig'],
                                          validate_indices=False), [-1]),
                                  model=model,
                                  num_nodes=num_nodes,
                                  pos_weight=pos_weight,
                                  norm=norm,
                                  global_step=global_steps)

        # session part
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    cost_val = []
    acc_val = []
    # load network
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph(meta_dir)
        saver.restore(sess, tf.train.latest_checkpoint(checkpoints_dir))
        sess.run()
        new_adj = get_new_adj(feed_dict)
    return new_adj
Esempio n. 7
0
def format_data(data_name):
    # Load data

    adj, features, true_labels = load_data(data_name)

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)
    adj = adj_train

    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

    # Some preprocessing
    adj_norm = preprocess_graph(adj)

    num_nodes = adj.shape[0]

    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    adj_label = adj_train + 2 * sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    feas = {}
    feas['adj'] = adj
    feas['num_features'] = num_features
    feas['num_nodes'] = num_nodes
    feas['features_nonzero'] = features_nonzero
    feas['pos_weight'] = pos_weight
    feas['norm'] = norm
    feas['adj_norm'] = adj_norm
    feas['adj_label'] = adj_label
    feas['features'] = features
    feas['true_labels'] = true_labels
    feas['train_edges'] = train_edges
    feas['val_edges'] = val_edges
    feas['val_edges_false'] = val_edges_false
    feas['test_edges'] = test_edges
    feas['test_edges_false'] = test_edges_false
    feas['adj_orig'] = adj_orig

    return feas
Esempio n. 8
0
def load_model(placeholders, model, opt, adj_train, test_edges, test_edges_false, features, sess, name="single_fold"):

        adj = adj_train
        # This will be calculated for every fold
        # pos_weight and norm should be tensors
        print ('----------------')
        pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() # N/P
        norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) # (N+P) x (N+P) / (N)

        adj_label = adj_train + sp.eye(adj_train.shape[0])
        adj_label = sparse_to_tuple(adj_label)

        # Some preprocessing. adj_norm is D^(-1/2) x adj x D^(-1/2)
        adj_norm = preprocess_graph(adj)
    
        # Construct feed dictionary
        feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders)
        feed_dict.update({placeholders['dropout']: FLAGS.dropout})
        feed_dict.update({placeholders['is_training']: True})
        feed_dict.update({placeholders['norm']: norm})
        feed_dict.update({placeholders['pos_weight']: pos_weight})

        # Some preprocessing. adj_norm is D^(-1/2) x adj x D^(-1/2)
        adj_norm = preprocess_graph(adj)
        saver = tf.train.Saver()
        
        saver.restore(sess=sess, save_path=(save_dir+name))
        print ('Model restored')

        # Decrease MC samples for pubmed 
        if (dataset_str == 'pubmed'): 
                S = 5
        else:
                S = 15
        
        adj_score, z_activated = get_score_matrix(sess, placeholders, feed_dict, model, S=S, save_qual=True)

        return adj_score, z_activated
Esempio n. 9
0
def test_one_graph(adj, adj_orig, features_csr, num_node, k_num, model,
                   placeholders, sess, feed_dict):
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]),
        shape=adj_orig.shape)  # delete self loop
    adj_orig.eliminate_zeros()
    adj_new = adj
    features = sparse_to_tuple(features_csr.tocoo())
    adj_label = adj_new + sp.eye(adj.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    adj_clean = adj_orig.tocsr()
    k_num = int(k_num * size / noise_ratio)  # match the budget size
    if k_num != 0:
        adj_norm, adj_norm_sparse = preprocess_graph(adj_new)
        feed_dict.update({placeholders["adj"]: adj_norm})
        feed_dict.update({placeholders["adj_orig"]: adj_label})
        feed_dict.update({placeholders["features"]: features})
        feed_dict.update({placeholders['dropout']: FLAGS.dropout})
        model.k = k_num
        x_tilde = sess.run(model.realD_tilde,
                           feed_dict=feed_dict,
                           options=run_options)
        noised_indexes, clean_indexes = get_noised_indexes(
            x_tilde, adj_new, num_node)
        feed_dict.update({placeholders["noised_mask"]: noised_indexes})
        feed_dict.update({placeholders["clean_mask"]: clean_indexes})
        feed_dict.update({placeholders["noised_num"]: len(noised_indexes) / 2})
        test1 = model.test_new_indexes.eval(session=sess, feed_dict=feed_dict)
        test0 = model.test_noised_index.eval(session=sess, feed_dict=feed_dict)
        new_adj = get_new_adj(feed_dict, sess, model, noised_indexes, adj_new,
                              k_num, num_node)
    else:
        # new_adj = adj
        new_adj = adj.copy()
    new_adj_sparse = sp.csr_matrix(new_adj)

    psnr = PSNR(adj_clean[:num_node, :num_node],
                new_adj_sparse[:num_node, :num_node])
    wls = WL_no_label(adj_clean[:num_node, :num_node],
                      new_adj_sparse[:num_node, :num_node])
    return psnr, wls
Esempio n. 10
0
def format_data(data_source):

    adj, features, labels = load_data2(data_source)
    #print(adj.shape,'1111111111')
    #print(features.shape,'2222222')
    #print(features,'XXXXXSSSSSSSSSSSSS')
    #print(labels.shape,'33333333333')
    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

    adj_norm = preprocess_graph(adj)
    #print(adj_norm,'0000000000000')

    num_nodes = adj.shape[0]
    #print(num_nodes,'444444444')

    features = sparse_to_tuple(features.tocoo())
    #print(features,'NNNNNNNNNNNNNNNNN')
    #print(features[0].shape,'66666666666')
    #print(features[1].shape, '66666666666@@@')
    #print(features[2], '66666666666###')
    num_features = features[2][1]
    #print(num_features,'7777777777777')
    features_nonzero = features[1].shape[0]
    #print(features_nonzero,'8888888888888')

    adj_label = adj + sp.eye(adj.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    #print(adj_label,'AAAAAAAAAAAAAAAAAAAA')
    items = [
        adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label,
        features, labels
    ]
    feas = {}
    for item in items:
        # item_name = [ k for k,v in locals().iteritems() if v == item][0]]
        item_name = retrieve_name(item)
        feas[item_name] = item

    return feas
Esempio n. 11
0
def prepare_data_for_model(adj_train, target_adj_train, device):
    """"Prepare the given data ready to be put in the model"""

    # Some preprocessing
    adj_train_norm   = preprocess_graph(adj_train)
    adj_train_norm   = make_sparse(adj_train_norm)
    adj_train_labels = torch.FloatTensor(target_adj_train + sp.eye(target_adj_train.shape[0]).todense())

    # Features are the identity matrix
    features = sp.eye(adj_train.shape[0]).tolil()
    features = make_sparse(features)

    data = {
        'adj_norm'  : adj_train_norm,
        'adj_labels': adj_train_labels,
        'features'  : features,
    }

    data['adj_norm'] = data['adj_norm'].to(device)
    data['adj_labels'] = data['adj_labels'].to(device)
    data['features'] = data['features'].to(device)

    return data
Esempio n. 12
0
def format_data(data_source):

    #    adj = load_adj('../data/facebook/0')
    #    features = load_attr('../data/facebook/0')
    #    labels = np.ones(adj.shape[0])
    #    adj, features, labels = load_data2(data_source)
    adj, features, labels = load_data('twitter')
    #    print(adj)
    print(type(adj), type(features))
    print(adj.shape, features.shape)
    features = normalize(features, norm='l1', axis=1)
    print(features[:5])
    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

    adj_norm = preprocess_graph(adj)

    num_nodes = adj.shape[0]

    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]

    adj_label = adj + sp.eye(adj.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    items = [
        adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label,
        features, labels
    ]
    feas = {}
    for item in items:
        # item_name = [ k for k,v in locals().iteritems() if v == item][0]]
        item_name = retrieve_name(item)
        feas[item_name] = item

    return feas
Esempio n. 13
0
def train_one_graph(adj, adj_orig, features_csr, num_node, k_num, model, opt,
                    placeholders, sess, new_learning_rate, feed_dict, epoch,
                    graph_index):
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]),
        shape=adj_orig.shape)  # delete self loop
    adj_orig.eliminate_zeros()
    adj_new = adj
    features = sparse_to_tuple(features_csr.tocoo())
    adj_norm, adj_norm_sparse = preprocess_graph(adj_new)
    adj_label = adj_new + sp.eye(adj.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    ############
    # build models
    adj_clean = adj_orig.tocoo()
    adj_clean_tensor = tf.SparseTensor(indices=np.stack(
        [adj_clean.row, adj_clean.col], axis=-1),
                                       values=adj_clean.data,
                                       dense_shape=adj_clean.shape)
    ### initial clean and noised_mask
    clean_mask = np.array([1, 2, 3, 4, 5])
    noised_mask = np.array([6, 7, 8, 9, 10])
    noised_num = noised_mask.shape[0] / 2
    ##################################
    #
    feed_dict.update({placeholders["adj"]: adj_norm})
    feed_dict.update({placeholders["adj_orig"]: adj_label})
    feed_dict.update({placeholders["features"]: features})
    node_mask = np.ones([adj.shape[0], n_class])
    node_mask[num_node:, :] = 0
    feed_dict.update({placeholders['node_mask']: node_mask})
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})
    model.k = k_num
    #####################################################
    t = time.time()
    ########
    if epoch > int(
            FLAGS.epochs / 2):  ## here we can control the manner of new model
        _ = sess.run([opt.G_min_op], feed_dict=feed_dict, options=run_options)

    else:
        _, x_tilde = sess.run([opt.D_min_op, model.realD_tilde],
                              feed_dict=feed_dict,
                              options=run_options)
        if epoch == int(FLAGS.epochs / 2):
            noised_indexes, clean_indexes = get_noised_indexes(
                x_tilde, adj_new, num_node)
            feed_dict.update({placeholders["noised_mask"]: noised_indexes})
            feed_dict.update({placeholders["clean_mask"]: clean_indexes})
            feed_dict.update(
                {placeholders["noised_num"]: len(noised_indexes) / 2})

    if epoch % 1 == 0 and graph_index == 0:
        if epoch > int(FLAGS.epochs / 2):
            print("This is the generation part")
        else:
            print("This is the cluster mask part")
        print("Epoch:", '%04d' % (epoch + 1), "time=",
              "{:.5f}".format(time.time() - t))
        G_loss, D_loss, new_learn_rate_value = sess.run(
            [opt.G_comm_loss, opt.D_loss, new_learning_rate],
            feed_dict=feed_dict,
            options=run_options)
        print("Step: %d,G: loss=%.7f ,L_u: loss= %.7f, LR=%.7f" %
              (epoch, G_loss, D_loss + 1, new_learn_rate_value))
        ##########################################
    return
Esempio n. 14
0
def train():
    ## add noise label
    train_adj_list, train_adj_orig_list, train_k_list = add_noises_on_adjs(
        train_structure_input, train_num_nodes_all)
    test_adj_list, test_adj_orig_list, test_k_list = add_noises_on_adjs(
        test_structure_input, test_num_nodes_all)

    adj = train_adj_list[0]
    features_csr = train_feature_input[0]
    features = sparse_to_tuple(features_csr.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]
    adj_orig = train_adj_orig_list[0]
    adj_label = train_adj_list[0] + sp.eye(adj.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    num_nodes = adj.shape[0]

    adj_norm, adj_norm_sparse = preprocess_graph(adj)

    ############
    global_steps = tf.get_variable('global_step',
                                   trainable=False,
                                   initializer=0)
    new_learning_rate_dis = tf.train.exponential_decay(
        FLAGS.learn_rate_init,
        global_step=global_steps,
        decay_steps=100,
        decay_rate=0.95)
    new_learning_rate_gen = tf.train.exponential_decay(
        FLAGS.learn_rate_init_gen,
        global_step=global_steps,
        decay_steps=100,
        decay_rate=0.95)
    new_learn_rate_value = FLAGS.learn_rate_init
    # set the placeholders
    placeholders = {
        'features': tf.sparse_placeholder(tf.float32, name="ph_features"),
        'adj': tf.sparse_placeholder(tf.float32, name="ph_adj"),
        'adj_orig': tf.sparse_placeholder(tf.float32, name="ph_orig"),
        'dropout': tf.placeholder_with_default(0.3,
                                               shape=(),
                                               name="ph_dropout"),
        'clean_mask': tf.placeholder(tf.int32),
        'noised_mask': tf.placeholder(tf.int32),
        'noised_num': tf.placeholder(tf.int32),
        'node_mask': tf.placeholder(tf.float32)
    }
    # build models
    model = None
    adj_clean = adj_orig.tocoo()
    adj_clean_tensor = tf.SparseTensor(indices=np.stack(
        [adj_clean.row, adj_clean.col], axis=-1),
                                       values=adj_clean.data,
                                       dense_shape=adj_clean.shape)
    if model_str == "mask_gvae":
        model = mask_gvae(placeholders,
                          num_features,
                          num_nodes,
                          features_nonzero,
                          new_learning_rate_dis,
                          new_learning_rate_gen,
                          adj_clean=adj_clean_tensor,
                          k=int(adj.sum() * noise_ratio))
        model.build_model()
    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
    opt = 0
    # Optimizer
    with tf.name_scope('optimizer'):
        if model_str == 'mask_gvae':
            opt = Optimizer(preds=tf.reshape(model.x_tilde, [-1]),
                            labels=tf.reshape(
                                tf.sparse_tensor_to_dense(
                                    placeholders['adj_orig'],
                                    validate_indices=False), [-1]),
                            model=model,
                            num_nodes=num_nodes,
                            global_step=global_steps,
                            new_learning_rate=new_learning_rate_dis,
                            new_learning_rate_gen=new_learning_rate_gen,
                            placeholders=placeholders)
    # init the session
    sess = tf.Session()
    # sess.run(tf.global_variables_initializer()) # initial test
    # initial clean and noised_mask
    clean_mask = np.array([1, 2, 3, 4, 5])
    noised_mask = np.array([6, 7, 8, 9, 10])
    noised_num = noised_mask.shape[0] / 2
    # ##################################
    feed_dict = construct_feed_dict(adj_norm, adj_label, features, clean_mask,
                                    noised_mask, noised_num, placeholders)
    node_mask = np.ones([num_nodes, n_class])
    node_mask[train_num_nodes_all[0]:, :] = 0
    feed_dict.update({placeholders['node_mask']: node_mask})
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})
    # ##################################

    if if_train:
        sess.run(tf.global_variables_initializer())  # initial test
        for epoch in range(FLAGS.epochs):
            for i in tqdm(range(len(train_feature_input))):
                train_one_graph(train_adj_list[i], train_adj_orig_list[i],
                                train_feature_input[i], train_num_nodes_all[i],
                                train_k_list[i], model, opt, placeholders,
                                sess, new_learning_rate_gen, feed_dict, epoch,
                                i)
        saver = tf.train.Saver()  # define saver in the loop
        saver.save(sess, "./checkpoints/{}.ckpt".format(dataset_str))
        print("Optimization Finished!")
        psnr_list = []
        wls_list = []
        for i in range(len(test_feature_input)):
            psnr, wls = test_one_graph(test_adj_list[i], test_adj_orig_list[i],
                                       test_feature_input[i],
                                       test_num_nodes_all[i], test_k_list[i],
                                       model, placeholders, sess, feed_dict)
            psnr_list.append(psnr)
            wls_list.append(wls)
        print(psnr_list)
    else:
        saver = tf.train.Saver()  # define saver in the loop
        saver.restore(sess, "./checkpoints/{}.ckpt".format(dataset_str))
        psnr_list = []
        wls_list = []
        for i in range(len(test_feature_input)):
            psnr, wls = test_one_graph(test_adj_list[i], test_adj_orig_list[i],
                                       test_feature_input[i],
                                       test_num_nodes_all[i], test_k_list[i],
                                       model, placeholders, sess, feed_dict)
            psnr_list.append(psnr)
            wls_list.append(wls)
        print(psnr_list)
    ##################################
    ################## the PSRN and WL #########################
    print("#" * 15)
    print("The PSNR is:")
    print(np.mean(psnr_list))
    print("The WL is :")
    print(np.mean(wls_list))
    return np.mean(psnr_list), np.mean(wls_list)
Esempio n. 15
0
def train_gcn(features, adj_train, train_edges, train_edges_false, test_edges,
              test_edges_false):
    # Settings
    flags = tf.app.flags
    FLAGS = flags.FLAGS
    flags.DEFINE_float('learning_rate', 0.005, 'Initial learning rate.')
    flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
    flags.DEFINE_integer('hidden1', 96, 'Number of units in hidden layer 1.')
    flags.DEFINE_integer('hidden2', 48, 'Number of units in hidden layer 2.')
    flags.DEFINE_float('weight_decay', 0.,
                       'Weight for L2 loss on embedding matrix.')
    flags.DEFINE_float('dropout', 0., 'Dropout rate (1 - keep probability).')
    flags.DEFINE_string('model', 'gcn_vae', 'Model string.')
    flags.DEFINE_integer('features', 1,
                         'Whether to use features (1) or not (0).')

    model_str = FLAGS.model

    #1-dim index array, used in cost function to only focus on those interactions with high confidence
    mask_index = construct_optimizer_list(features.shape[0], train_edges,
                                          train_edges_false)

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj_train
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj = adj_train

    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

    # Some preprocessing
    adj_norm = preprocess_graph(adj)

    # Define placeholders
    placeholders = {
        'features': tf.sparse_placeholder(tf.float64),
        'adj': tf.sparse_placeholder(tf.float64),
        'adj_orig': tf.sparse_placeholder(tf.float64),
        'dropout': tf.placeholder_with_default(0., shape=())
    }

    num_nodes = adj.shape[0]

    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]

    # Create model
    model = None
    if model_str == 'gcn_ae':
        model = GCNModelAE(placeholders, num_features, features_nonzero)
    elif model_str == 'gcn_vae':
        model = GCNModelVAE(placeholders, num_features, num_nodes,
                            features_nonzero)

    pos_weight = 1
    norm = 1
    #pos_weight = train_edges_false.shape[0] / float(train_edges.shape[0])
    #norm = (train_edges.shape[0]+train_edges_false.shape[0]) / float(train_edges_false.shape[0]*train_edges_false.shape[0])

    # Optimizer
    with tf.name_scope('optimizer'):
        if model_str == 'gcn_ae':
            opt = OptimizerAE(preds=model.reconstructions,
                              labels=tf.reshape(
                                  tf.sparse_tensor_to_dense(
                                      placeholders['adj_orig'],
                                      validate_indices=False), [-1]),
                              pos_weight=pos_weight,
                              norm=norm,
                              mask=mask_index)
        elif model_str == 'gcn_vae':
            opt = OptimizerVAE(preds=model.reconstructions,
                               labels=tf.reshape(
                                   tf.sparse_tensor_to_dense(
                                       placeholders['adj_orig'],
                                       validate_indices=False), [-1]),
                               model=model,
                               num_nodes=num_nodes,
                               pos_weight=pos_weight,
                               norm=norm,
                               mask=mask_index)

    # Initialize session
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    adj_label = adj_train + sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)

    # Train model
    for epoch in range(FLAGS.epochs):

        t = time.time()
        # Construct feed dictionary
        feed_dict = construct_feed_dict(adj_norm, adj_label, features,
                                        placeholders)
        feed_dict.update({placeholders['dropout']: FLAGS.dropout})
        # Run single weight update
        outs = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict)

        print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
              "{:.5f}".format(outs[1]))

    print("Optimization Finished!")

    #return embedding for each protein
    emb = sess.run(model.z_mean, feed_dict=feed_dict)
    return emb
Esempio n. 16
0
        adj, test_percent=10., val_percent=5.)
    adj = adj_train  # This is the adj matrix that masked out all validation and testing entries.
    #print(adj_train.shape)
    #import pdb;pdb.set_trace()

    if FLAGS.features == 0:
        features = sp.identity(
            features.shape[0])  # featureless. sparse coo_matrix.

    # Some preprocessing
    #adj_norm = preprocess_graph(adj)

    attn_adj_norm = adj + sp.eye(adj.shape[0])
    attn_adj_norm = sparse_to_tuple(attn_adj_norm)  # a tuple

    adj_norm = preprocess_graph(
        adj)  # a tuple. Normalization. Identical matrix is added here.

    #print(type(adj + sp.eye(adj.shape[0])))
    #import pdb;pdb.set_trace()

    # Define placeholders
    placeholders = {  # this is passed directly to the model to build the graph.
        'features': tf.sparse_placeholder(tf.float32),
        'adj': tf.sparse_placeholder(tf.float32),
        'adj_orig': tf.sparse_placeholder(tf.float32),
        'in_drop': tf.placeholder_with_default(0., shape=()),
        'attn_drop': tf.placeholder_with_default(0., shape=()),
        'feat_drop': tf.placeholder_with_default(0., shape=())
    }

    num_nodes = adj.shape[0]
Esempio n. 17
0
def train():
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]),
        shape=adj_orig.shape)  # delete self loop
    # adj_orig.eliminate_zeros()
    # adj_new = randomly_add_edges(adj_orig, k=FLAGS.k)
    adj_new = adj_orig
    features_new_csr = randomly_flip_features(features_csr,
                                              k=FLAGS.k,
                                              seed=seed + 5)
    feature_new = sparse_to_tuple(features_new_csr.tocoo())
    # feature_new = features
    # features_new_csr =features_csr
    # features_nonzero = feature_new[1].shape[0]
    # train GCN first
    # sizes = [FLAGS.gcn_hidden1, FLAGS.gcn_hidden2, n_class]
    # surrogate_model = GCN.GCN(sizes, adj_norm_sparse_csr, features_csr, with_relu=True, name="surrogate", gpu_id=gpu_id)
    # surrogate_model.train(adj_norm_sparse_csr, split_train, split_val, node_labels)
    # ori_acc = surrogate_model.test(split_unlabeled, node_labels, adj_norm_sparse_csr)
    ####################### the clean and noised GCN  ############################
    testacc_clean, valid_acc_clean = GCN.run(FLAGS.dataset,
                                             adj_orig,
                                             features_csr,
                                             name="clean")
    testacc, valid_acc = GCN.run(FLAGS.dataset,
                                 adj_new,
                                 features_new_csr,
                                 name="original")
    testacc_upper, valid_acc_upper = GCN.run(FLAGS.dataset,
                                             adj_new,
                                             features_csr,
                                             name="upper_bound")
    ###########
    print(testacc_clean)
    print(testacc)
    print(testacc_upper)
    ###########
    ##############################################################################
    adj_norm, adj_norm_sparse = preprocess_graph(adj_new)
    adj_norm_sparse_csr = adj_norm_sparse.tocsr()
    adj_label = adj_new + sp.eye(adj.shape[0])
    adj_label_sparse = adj_label
    adj_label = sparse_to_tuple(adj_label)
    if_drop_edge = True
    ## set the checkpoint path
    checkpoints_dir_base = "./checkpoints"
    current_time = datetime.datetime.now().strftime("%y%m%d%H%M%S")
    checkpoints_dir = os.path.join(checkpoints_dir_base, current_time,
                                   current_time)
    ############
    global_steps = tf.get_variable('global_step',
                                   trainable=False,
                                   initializer=0)
    new_learning_rate = tf.train.exponential_decay(FLAGS.learn_rate_init,
                                                   global_step=global_steps,
                                                   decay_steps=10000,
                                                   decay_rate=0.98)
    new_learn_rate_value = FLAGS.learn_rate_init
    ## set the placeholders
    placeholders = {
        'features': tf.sparse_placeholder(tf.float32, name="ph_features"),
        'adj': tf.sparse_placeholder(tf.float32, name="ph_adj"),
        'adj_orig': tf.sparse_placeholder(tf.float32, name="ph_orig"),
        'dropout': tf.placeholder_with_default(0., shape=(),
                                               name="ph_dropout"),
        # 'node_labels': tf.placeholder(tf.float32, name = "ph_node_labels"),
        # 'node_ids' : tf.placeholder(tf.float32, name = "ph_node_ids")
    }
    # build models
    model = None
    if model_str == "gae_gan":
        model = gaegan(placeholders, num_features, num_nodes, features_nonzero,
                       new_learning_rate)
        model.build_model()
    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
    opt = 0
    # Optimizer
    with tf.name_scope('optimizer'):
        if model_str == 'gae_gan':
            opt = Optimizergaegan(
                preds=tf.reshape(model.x_tilde, [-1]),
                labels=tf.reshape(
                    tf.sparse_tensor_to_dense(placeholders['adj_orig'],
                                              validate_indices=False), [-1]),
                #comm_label=placeholders["comm_label"],
                model=model,
                num_nodes=num_nodes,
                pos_weight=pos_weight,
                norm=norm,
                global_step=global_steps,
                new_learning_rate=new_learning_rate)
    # init the sess
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    saver = ""
    var_list = tf.global_variables()
    var_list = [
        var for var in var_list
        if ("encoder" in var.name) or ('generate' in var.name)
    ]
    saver = tf.train.Saver(var_list, max_to_keep=10)
    if if_save_model:
        os.mkdir(os.path.join(checkpoints_dir_base, current_time))
        saver.save(sess, checkpoints_dir)  # save the graph

    if restore_trained_our:
        checkpoints_dir_our = "./checkpoints"
        checkpoints_dir_our = os.path.join(checkpoints_dir_our,
                                           FLAGS.trained_our_path)
        # checkpoints_dir_meta = os.path.join(checkpoints_dir_base, FLAGS.trained_our_path,
        #                                     FLAGS.trained_our_path + ".meta")
        #saver.restore(sess,tf.train.latest_checkpoint(checkpoints_dir_our))
        saver.restore(
            sess,
            os.path.join("./checkpoints", "191215231708", "191215231708-1601"))
        print("model_load_successfully")
    # else:  # if not restore the original then restore the base dis one.
    #     checkpoints_dir_base = os.path.join("./checkpoints/base", FLAGS.trained_base_path)
    #     saver.restore(sess, tf.train.latest_checkpoint(checkpoints_dir_base))

    feed_dict = construct_feed_dict(adj_norm, adj_label, feature_new,
                                    placeholders)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})
    # pred_dis_res = model.vaeD_tilde.eval(session=sess, feed_dict=feed_dict)

    #### save new_adj without norm#############
    if restore_trained_our:
        modified_adj = get_new_adj(feed_dict, sess, model)
        modified_adj = sp.csr_matrix(modified_adj)
        sp.save_npz("transfer_new/transfer_1216_1/qq_5000_gaegan_new.npz",
                    modified_adj)
        sp.save_npz("transfer_new/transfer_1216_1/qq_5000_gaegan_ori.npz",
                    adj_new)
        print("save the loaded adj")
    # print("before training generator")
    #####################################################
    ##  get all variables in the model
    def model_summary():
        model_vars = tf.trainable_variables()
        slim.model_analyzer.analyze_vars(model_vars, print_info=True)

    model_summary()
    #####################################################
    G_loss_min = 1000
    for epoch in range(FLAGS.epochs):
        t = time.time()
        # run Encoder's optimizer
        #sess.run(opt.encoder_min_op, feed_dict=feed_dict)
        # run G optimizer  on trained model
        if restore_trained_our:
            sess.run(opt.G_min_op, feed_dict=feed_dict, options=run_options)
        else:  # it is the new model
            if epoch < FLAGS.epochs:
                sess.run(opt.G_min_op,
                         feed_dict=feed_dict,
                         options=run_options)
            #
        ##
        ##
        if epoch % 50 == 0:
            print("Epoch:", '%04d' % (epoch + 1), "time=",
                  "{:.5f}".format(time.time() - t))
            G_loss, laplacian_para, new_learn_rate_value = sess.run(
                [opt.G_comm_loss, opt.reg, new_learning_rate],
                feed_dict=feed_dict,
                options=run_options)
            #new_adj = get_new_adj(feed_dict, sess, model)
            new_adj = model.new_adj_output.eval(session=sess,
                                                feed_dict=feed_dict)
            temp_pred = new_adj.reshape(-1)
            #temp_ori = adj_norm_sparse.todense().A.reshape(-1)
            temp_ori = adj_label_sparse.todense().A.reshape(-1)
            mutual_info = normalized_mutual_info_score(temp_pred, temp_ori)
            print(
                "Step: %d,G: loss=%.7f ,Lap_para: %f  ,info_score = %.6f, LR=%.7f"
                % (epoch, G_loss, laplacian_para, mutual_info,
                   new_learn_rate_value))
            ## here is the debug part of the model#################################
            new_features, reg_trace, reg_log, reward_ratio, node_per, fea_per = sess.run(
                [
                    model.new_fliped_features, opt.reg_trace, opt.reg_log,
                    opt.percentage_fea, model.node_per, model.fea_per
                ],
                feed_dict=feed_dict)
            print("reg_trace is:")
            print(reg_trace)
            print("reg_log is:")
            print(reg_log)
            print("reward_percentage")
            print(reward_ratio)
            print("New features")
            print(new_features[5, :20])
            print("node_percent")
            print(node_per)
            print("fea_per")
            print(fea_per)
            new_features_csr = sp.csr_matrix(new_features)
            ##########################################
            #';# check the D_loss_min
            if (G_loss < G_loss_min) and (epoch > 1000) and (if_save_model):
                saver.save(sess,
                           checkpoints_dir,
                           global_step=epoch,
                           write_meta_graph=False)
                print("min G_loss new")
            if G_loss < G_loss_min:
                G_loss_min = G_loss

        if (epoch % 200 == 1) and if_save_model:
            saver.save(sess,
                       checkpoints_dir,
                       global_step=epoch,
                       write_meta_graph=False)
            print("Epoch:", '%04d' % (epoch + 1), "time=",
                  "{:.5f}".format(time.time() - t))
    saver.save(sess,
               checkpoints_dir,
               global_step=FLAGS.epochs,
               write_meta_graph=True)
    print("Optimization Finished!")
    feed_dict.update({placeholders['dropout']: 0})
    new_adj = get_new_adj(feed_dict, sess, model)
    new_adj = new_adj - np.diag(np.diag(new_adj))
    new_adj_sparse = sp.csr_matrix(new_adj)
    print((abs(new_adj_sparse - new_adj_sparse.T) > 1e-10).nnz == 0)
    # new_adj_norm, new_adj_norm_sparse = preprocess_graph(new_adj)
    # new_adj_norm_sparse_csr = new_adj_norm_sparse.tocsr()
    # modified_model = GCN.GCN(sizes, new_adj_norm_sparse_csr, features_csr, with_relu=True, name="surrogate", gpu_id=gpu_id)
    # modified_model.train(new_adj_norm_sparse_csr, split_train, split_val, node_labels)
    # modified_acc = modified_model.test(split_unlabeled, node_labels, new_adj_norm_sparse_csr)
    testacc_new, valid_acc_new = GCN.run(FLAGS.dataset,
                                         new_adj_sparse,
                                         features_csr,
                                         name="modified")
    new_adj = get_new_adj(feed_dict, sess, model)
    new_adj = new_adj - np.diag(np.diag(new_adj))
    new_adj_sparse = sp.csr_matrix(new_adj)
    testacc_new2, valid_acc_new = GCN.run(FLAGS.dataset,
                                          adj_new,
                                          new_features_csr,
                                          name="modified2")
    new_adj = get_new_adj(feed_dict, sess, model)
    new_adj = new_adj - np.diag(np.diag(new_adj))
    new_adj_sparse = sp.csr_matrix(new_adj)
    testacc_new3, valid_acc_new = GCN.run(FLAGS.dataset,
                                          new_adj_sparse,
                                          new_features_csr,
                                          name="modified3")
    #np.save("./data/hinton/hinton_new_adj_48_0815.npy", new_adj)
    #roc_score, ap_score = get_roc_score(test_edges, test_edges_false,feed_dict, sess, model)
    ##### The final results ####
    print("*" * 30)
    print("the final results:\n")
    print("*" * 30)
    print("The clean acc is: ")
    print(testacc_clean)
    print("*#" * 15)
    print("The original acc is: ")
    print(testacc)
    print("*#" * 15)
    print("The only modify adj acc is : ")
    print(testacc_new)
    print("*#" * 15)
    print("The only modify feature acc is : ")
    print(testacc_new2)
    print("*#" * 15)
    print("The modify both adj and feature and acc is : ")
    print(testacc_new3)
    return new_adj, testacc_clean, testacc, testacc_new, testacc_new2, testacc_new3
Esempio n. 18
0
showed_target_idx = 0  # the target index group of targets you want to show
run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
###################################
### read and process the graph
model_str = FLAGS.model
dataset_str = FLAGS.dataset
# Load data
# _A_obs, _X_obs, _z_obs = utils.load_npz('data/citeseer.npz')
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(
    "citeseer")

# _A_obs = _A_obs + _A_obs.T #变GCN_ori as GCN
# _A_obs[_A_obs > 1] = 1
# adj = _A_obs

adj_norm, adj_norm_sparse = preprocess_graph(adj)

#_K = _z_obs.max()+1 #类别个数
_K = y_train.shape[1]
features_normlize = normalize(features.tocsr(), axis=0, norm='max')
features = sp.csr_matrix(features_normlize)

# adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
# adj = adj_train
if FLAGS.features == 0:
    features = sp.identity(features.shape[0])  # featureless
# Some preprocessing

placeholders = {
    'features': tf.sparse_placeholder(tf.float32, name="ph_features"),
    'adj': tf.sparse_placeholder(tf.float32, name="ph_adj"),
Esempio n. 19
0
def train(unused):
    if_drop_edge = True
    if_save_model = not FLAGS.test
    if_train_dis = False  # if train the community detection while training the generator part
    restore_trained_our = FLAGS.test
    showed_target_idx = 0  # the target index group of targets you want to show
    ##################################
    ### read and process the graph
    model_str = FLAGS.model
    dataset_str = FLAGS.dataset
    # Load data
    if FLAGS.dataset == "dblp":
        adj = sp.load_npz("data/dblp/dblp_medium_adj.npz")
        features = np.load("data/dblp/dblp_medium_features.npy")
        features_normlize = normalize(features, axis=0, norm='max')
        features = sp.csr_matrix(features_normlize)
        target_list = np.load("data/dblp/dblp_medium_label.npy")
    elif FLAGS.dataset == "finance":
        adj = sp.load_npz('./data/finance/Finance_large_adj.npz')
        features = np.load("data/finance/Finance_large_features.npy")
        features_normlize = normalize(features, axis=0, norm='max')
        features = sp.csr_matrix(features_normlize)
        target_list = np.load("data/finance/Finance_large_label.npy")
    # Store original adjacency matrix (without diagonal entries) for later
    a = 1

    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()
    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless
    # Some preprocessing
    adj_norm, adj_norm_sparse = preprocess_graph(adj)
    num_nodes = adj.shape[0]
    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]
    cost_val = []
    acc_val = []
    cost_val = []
    acc_val = []
    val_roc_score = []
    adj_label = adj_orig + sp.eye(adj.shape[0])
    adj_label_sparse = adj_label
    adj_label = sparse_to_tuple(adj_label)

    if_drop_edge = True
    ## set the checkpoint path
    checkpoints_dir_base = "./checkpoints"
    current_time = datetime.datetime.now().strftime("%y%m%d%H%M%S")
    checkpoints_dir = os.path.join(checkpoints_dir_base, current_time,
                                   current_time)

    tf.reset_default_graph()
    global_steps = tf.get_variable('global_step',
                                   trainable=False,
                                   initializer=0)
    new_learning_rate = tf.train.exponential_decay(FLAGS.learn_rate_init,
                                                   global_step=global_steps,
                                                   decay_steps=10000,
                                                   decay_rate=0.98)
    new_learn_rate_value = FLAGS.learn_rate_init
    ## set the placeholders
    placeholders = {
        'features': tf.sparse_placeholder(tf.float32, name="ph_features"),
        'adj': tf.sparse_placeholder(tf.float32, name="ph_adj"),
        'adj_orig': tf.sparse_placeholder(tf.float32, name="ph_orig"),
        'dropout': tf.placeholder_with_default(0., shape=(),
                                               name="ph_dropout"),
    }
    # build models
    model = None
    if model_str == "cdattack":
        model = cdattack(placeholders, num_features, num_nodes,
                         features_nonzero, new_learning_rate, target_list,
                         FLAGS.alpha, FLAGS.comm_name)
        model.build_model()
    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
    opt = 0
    # Optimizer
    with tf.name_scope('optimizer'):
        if model_str == 'cdattack':
            opt = Optimizercdattack(preds=tf.reshape(model.x_tilde, [-1]),
                                    labels=tf.reshape(
                                        tf.sparse_tensor_to_dense(
                                            placeholders['adj_orig'],
                                            validate_indices=False), [-1]),
                                    model=model,
                                    num_nodes=num_nodes,
                                    pos_weight=pos_weight,
                                    norm=norm,
                                    target_list=target_list,
                                    global_step=global_steps,
                                    new_learning_rate=new_learning_rate)
    # init the sess
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    saver = ""
    var_list = tf.global_variables()
    saver = tf.train.Saver(var_list, max_to_keep=10)
    if if_save_model:
        os.mkdir(os.path.join(checkpoints_dir_base, current_time))
        saver.save(sess, checkpoints_dir)  # save the graph

    if restore_trained_our:
        checkpoints_dir_our = "./checkpoints"
        checkpoints_dir_our = os.path.join(checkpoints_dir_our,
                                           FLAGS.trained_our_path)
        saver.restore(sess, tf.train.latest_checkpoint(checkpoints_dir_our))
        print("model_load_successfully")
    feed_dict = construct_feed_dict(adj_norm, adj_label, features,
                                    placeholders)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})
    pred_dis_res = model.vaeD_tilde.eval(session=sess, feed_dict=feed_dict)
    modified_adj = get_new_adj(feed_dict, sess, model)
    modified_adj = sp.csr_matrix(modified_adj)
    #####################################################
    G_loss_min = 1000
    if FLAGS.test == False:
        for epoch in range(FLAGS.epochs):
            t = time.time()
            if restore_trained_our:
                sess.run(opt.G_min_op, feed_dict=feed_dict)
            else:  # it is the new model
                if epoch >= int(FLAGS.epochs / 2):
                    sess.run(opt.G_min_op, feed_dict=feed_dict)
                    if if_train_dis == True:
                        sess.run(opt.D_min_op, feed_dict=feed_dict)
                # run D optimizer
                if epoch < int(FLAGS.epochs / 2):
                    sess.run(opt.D_min_op_clean, feed_dict=feed_dict)

            if epoch % 50 == 0:
                print("Epoch:", '%04d' % (epoch + 1), "time=",
                      "{:.5f}".format(time.time() - t))
                comm_loss_clean, comm_loss, G_loss, new_learn_rate_value = sess.run(
                    [
                        opt.D_mincut_loss_clean, opt.D_mincut_loss,
                        opt.G_comm_loss, new_learning_rate
                    ],
                    feed_dict=feed_dict)

                new_adj = model.new_adj_output.eval(session=sess,
                                                    feed_dict=feed_dict)
                temp_pred = new_adj.reshape(-1)
                temp_ori = adj_label_sparse.todense().A.reshape(-1)
                print(
                    "Step %d:Loss Lu_clean = %.7f ,  Loss Lu = %.7f Loss Lg: loss=%.7f , LR=%.7f"
                    % (epoch, comm_loss_clean, comm_loss, G_loss,
                       new_learn_rate_value))
                ## check the D_loss_min
                if (G_loss < G_loss_min) and (
                        epoch > int(FLAGS.epochs / 2) + 1) and (if_save_model):
                    saver.save(sess,
                               checkpoints_dir,
                               global_step=epoch,
                               write_meta_graph=False)
                    print("min G_loss new")
                if G_loss < G_loss_min:
                    G_loss_min = G_loss
            if (epoch % 200 == 0) and if_save_model:
                saver.save(sess,
                           checkpoints_dir,
                           global_step=epoch,
                           write_meta_graph=False)
                print("Save the model at epoch:", '%04d' % (epoch + 1))
    if if_save_model:
        saver.save(sess,
                   checkpoints_dir,
                   global_step=FLAGS.epochs,
                   write_meta_graph=False)
    print("Optimization Finished!")
    new_adj = get_new_adj(feed_dict, sess, model)
    ##### The final results ######
    feed_dict.update({placeholders['dropout']: 0})
    pred_dis_res = model.vaeD_tilde.eval(session=sess, feed_dict=feed_dict)
    print("*" * 15)
    print("The modified matrics")
    print_M1(target_list, pred_dis_res, FLAGS.n_clusters)
    print("*" * 15)
    print_M2(target_list, pred_dis_res, FLAGS.n_clusters)
    print("*" * 15)
    new_adj = get_new_adj(feed_dict, sess, model)
    x_tilde_out = model.new_adj_output.eval(session=sess, feed_dict=feed_dict)
    temp_pred = new_adj.reshape(-1)
    temp_ori = adj_norm_sparse.todense().A.reshape(-1)
    return
Esempio n. 20
0
# pre process data #
print("pre processing data...")
posts_data = preprocessing.preprocess_text(posts_data)
users_data = preprocessing.preprocess_text(users_data)

# create network with topics #
print("create network")
topics = graph.get_topics(users_data, 0.1, 5)
network_file_name = SOURCE / 'outputs/bullies_network.csv'
graph.create_csv_network_from_topics(network_file_name, topics)
network_graph = graph.create_graph(network_file_name)

# # pre process network #
print("pre processing network...")
network_graph = preprocessing.preprocess_graph(network_graph,
                                               0.1)  #todo change back to 0.1
graph.graph_attributes(network_graph)

# extract nlp features #
print("extract nlp features...")
feature_list = [
    'post_length', 'tfidf', 'topics', 'screamer', 'words', 'off_dis',
    'not_off_dis'
]
X_nlp = nlp_feature_extractions.extract_features(users_data, feature_list)
y_nlp = (users_data['cb_level'] == 3).astype(int)
X_users = nlp_feature_extractions.extract_number_of_posts(posts_data)
X_nlp = X_nlp.merge(X_users, on='writer')

# extract network features #
print("extract network features...")
Esempio n. 21
0
adj_orig = adj_orig - sp.dia_matrix(
    (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
adj_orig.eliminate_zeros()
#
adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
    adj)
adj_Rs = get_adj_01(adj_train.toarray())
adj = adj_train
adj_R = [sp.csr_matrix(adj_one) for adj_one in adj_Rs]

#若不使用采样得来的特征,则特征矩阵使用单位矩阵
if FLAGS.features == 0:
    features = sp.identity(features.shape[0])

# 预处理,主要内容写在preprocess模块里了
adj_R_norm = [preprocess_graph(one_adj) for one_adj in adj_R]

# placeholders
placeholders = {
    'features': tf.sparse_placeholder(tf.float32),
    #20
    'adj': [tf.sparse_placeholder(tf.float32) for _ in range(20)],
    'adj_orig': tf.sparse_placeholder(tf.float32),
    'dropout': tf.placeholder_with_default(0., shape=())
}

num_nodes = adj.shape[0]

features = sparse_to_tuple(features.tocoo())
num_features = features[2][1]
features_nonzero = features[1].shape[0]
Esempio n. 22
0
num_nodes = adj.shape[0]  # number of nodes in adjacency matrix
num_features = features_shape[
    1]  # number of features (columsn of features matrix)
features_nonzero = features_tuple[1].shape[
    0]  # number of non-zero entries in features matrix (or length of values list)
# Store original adjacency matrix (without diagonal entries) for later
adj_orig = adj
adj_orig = adj_orig - sp.dia_matrix(
    (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
adj_orig.eliminate_zeros()
np.random.seed(0)  # IMPORTANT: guarantees consistent train/test splits
adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
    adj, test_frac=.3, val_frac=.1)

# Normalize adjacency matrix
adj_norm = preprocess_graph(adj_train)

# Add in diagonals
adj_label = adj_train + sp.eye(adj_train.shape[0])
adj_label = sparse_to_tuple(adj_label)
# Inspect train/test split
print("Total nodes:", adj.shape[0])
print("Total edges:", int(
    adj.nnz / 2))  # adj is symmetric, so nnz (num non-zero) = 2*num_edges
print("Training edges (positive):", len(train_edges))
print("Training edges (negative):", len(train_edges_false))
print("Validation edges (positive):", len(val_edges))
print("Validation edges (negative):", len(val_edges_false))
print("Test edges (positive):", len(test_edges))
print("Test edges (negative):", len(test_edges_false))
# Define hyperparameters
Esempio n. 23
0
def main(args):
    """ Train GAE """

    # Compute the device upon which to run
    device = torch.device("cuda" if args.use_cuda else "cpu")

    print("Using {} dataset".format(args.dataset_str))
    # Load data
    np.random.seed(1)
    adj, features = load_data(args.dataset_str)
    N, D = features.shape

    # Store original adjacency matrix (without diagonal entries)
    adj_orig = adj
    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)

    # Some preprocessing
    adj_train_norm = preprocess_graph(adj_train)
    adj_train_norm = make_sparse(adj_train_norm)
    adj_train_labels = torch.FloatTensor(adj_train +
                                         sp.eye(adj_train.shape[0]).todense())
    features = make_sparse(features)

    n_edges = adj_train_labels.sum()

    data = {
        'adj_norm': adj_train_norm,
        'adj_labels': adj_train_labels,
        'features': features,
    }

    gae = GAE(data, n_hidden=32, n_latent=16, dropout=args.dropout)

    # Send the model and data to the available device
    gae.to(device)
    data['adj_norm'] = data['adj_norm'].to(device)
    data['adj_labels'] = data['adj_labels'].to(device)
    data['features'] = data['features'].to(device)

    optimizer = optim.Adam(gae.parameters(),
                           lr=args.lr,
                           betas=(0.95, 0.999),
                           weight_decay=args.weight_decay)

    # Results
    results = defaultdict(list)

    # Full batch training loop
    for epoch in range(args.num_epochs):

        t = time.time()
        gae.train()
        optimizer.zero_grad()

        # forward pass
        output = gae(data['features'], data['adj_norm'])

        # Compute the loss
        logits = output
        targets = data['adj_labels']
        loss = gae.norm * F.binary_cross_entropy_with_logits(
            logits, targets, pos_weight=gae.pos_weight)

        loss.backward()
        optimizer.step()

        results['train_elbo'].append(loss.item())

        gae.eval()
        emb = gae.get_embeddings(data['features'], data['adj_norm'])
        accuracy, roc_curr, ap_curr, = eval_gae(val_edges, val_edges_false,
                                                emb, adj_orig)
        results['accuracy_train'].append(accuracy)
        results['roc_train'].append(roc_curr)
        results['ap_train'].append(ap_curr)

        print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
              "{:.5f}".format(loss.item()), "train_acc=",
              "{:.5f}".format(accuracy), "val_roc=", "{:.5f}".format(roc_curr),
              "val_ap=", "{:.5f}".format(ap_curr))

        # Test loss
        if epoch % args.test_freq == 0:
            with torch.no_grad():
                gae.eval()
                emb = gae.get_embeddings(data['features'], data['adj_norm'])
                accuracy, roc_score, ap_score = eval_gae(
                    test_edges, test_edges_false, emb, adj_orig)
                results['accuracy_test'].append(accuracy)
                results['roc_test'].append(roc_curr)
                results['ap_test'].append(ap_curr)
            gae.train()

    print("Optimization Finished!")

    with torch.no_grad():
        # Test loss
        gae.eval()
        emb = emb = gae.get_embeddings(data['features'], data['adj_norm'])
        accuracy, roc_score, ap_score = eval_gae(test_edges, test_edges_false,
                                                 emb, adj_orig)
        print('Test Accuracy: ' + str(accuracy))
        print('Test ROC score: ' + str(roc_score))
        print('Test AP score: ' + str(ap_score))

    # Plot
    plot_results(results,
                 args.test_freq,
                 path=args.dataset_str + "_GAE_results.png")
Esempio n. 24
0
def format_data_ui(data_name, has_features=1):
    # Load data

    fpath_dir = '../data/useritem/%s/' % data_name
    fpath_input = '%sinput.pkl' % fpath_dir
    with open(fpath_input, 'rb') as f:
        (n_users, n_items, item_features, train, valid, test) = pkl.load(
            f)  # here features is not the returned features
    ui_graph = defaultdict(list)
    ii_graph = defaultdict(set)
    ii_graph_list = defaultdict(list)  # dict()
    for edge, value in train.items():
        u, i = edge
        ui_graph[u].append(i)
    #
    edge_dict = defaultdict(int)
    tmp_u_number = len(ui_graph)
    for index, (u, ilist) in enumerate(ui_graph.items()):

        if index % 500 == 0:
            print('user number: %d/%d' % (index, tmp_u_number))
        for i in ilist:
            for j in ilist:
                # ii_graph[i].add(j)
                if i != j:
                    edge_dict[(i, j)] += 1
        if len(edge_dict) % 5000 == 0:
            print('len(edge_dict):%d' % len(edge_dict))

    print('len(edge_dict):%d' % len(edge_dict))
    edge_visit_thresh = 2

    for edge, visit_num in edge_dict.items():
        i1, i2 = edge
        if visit_num >= edge_visit_thresh:
            ii_graph_list[i1].append(i2)  # = list(iset)
    print('%s:get ii mat' % (datetime.datetime.now().isoformat()))
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(ii_graph_list))
    print('adj shape:', adj.get_shape())

    # features: lil_matrix
    features = item_features.tolil()

    # true_labels: the neighbor truth : not used for me and arga...
    true_labels = None

    # --transform over, now follows the original procedure
    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
    adj = adj_train
    print('%s:mask test edges over' % (datetime.datetime.now().isoformat()))
    if has_features == 0:
        features = sp.identity(features.shape[0])  # featureless

    # Some preprocessing
    adj_norm = preprocess_graph(adj)

    num_nodes = adj.shape[0]

    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    adj_label = adj_train + sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    items = [adj, num_features, num_nodes, features_nonzero, pos_weight, norm, adj_norm, adj_label, features,
             true_labels, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, adj_orig]
    feas = {}
    for item in items:
        feas[retrieve_name(item)] = item

    return feas
Esempio n. 25
0
def main(args):
    
    dataset = args.dataset
    emb_output_dir = args.output
    epochs = args.epochs
    agg = args.agg
    p = args.p
    tr = args.tr
    lam = args.lam
    lose_func = args.loss

    # Preprocess dataset
    adj, views_features = load_data(dataset, num_views=3)
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()
    # Calculate pairwise simlarity.
    views_sim_matrix = {}
    views_feature_matrix = {}

    for view in list(views_features.keys()):
        feature_matrix = csc_matrix.todense(views_features[view])
        views_feature_matrix.update({view:feature_matrix})
 
    kernal = "rbf"
    if lose_func == 'all':
        attr_sim = cal_attr_sim(views_feature_matrix, dataset)
    else:
        attr_sim = 0

    # split nodes to train, valid and test datasets, 
    # remove test edges from train adjacent matrix. 
    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(dataset, adj)
    
    print("Masking edges Done!")
    adj = adj_train
    nx_G = nx.from_numpy_array(adj.toarray())
    num_nodes = adj.shape[0]
    adj_norm = preprocess_graph(adj)

    views_features_num = {}
    views_features_nonzero = {}
    for view in list(views_features.keys()):
        views_features[view] = sparse_to_tuple(views_features[view].tocoo())
        views_features_num.update({view:views_features[view][2][1]})
        views_features_nonzero.update({view:views_features[view][1].shape[0]})
    
    # Build model
    MagCAE = {}
    for view in list(views_features.keys()):
        x,y = views_features[view][2][0], views_features[view][2][1]
        model = GAE(y, views_features_nonzero[view], adj_norm, math.ceil(2*p*y), math.ceil(p*y))
        MagCAE.update({view:model})

    # Loss function and optimizer.
    # loss weight taken by each nodes to the total loss.
    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) /adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(adj.shape[0] * adj.shape[0] - adj.sum())*2
    optimizer = tf.keras.optimizers.Adam()

    adj_targ = adj_train + sp.eye(adj_train.shape[0])
    adj_targ = sparse_to_tuple(adj_targ)

    indices= np.array(adj_targ[0])
    values = np.array(adj_targ[1])
    dense_shape = np.array(adj_targ[2])
    sparse_targ = tf.SparseTensor(indices = indices,
                                    values = values,
                                    dense_shape = dense_shape)
    sparse_targ = tf.cast(sparse_targ, dtype=tf.float32)

    adj_targ = tf.sparse.to_dense(sparse_targ)
    adj_targ = tf.reshape(adj_targ,[-1])
    # Train and Evaluate Model
    # Training Loop:
    # In each epoch: views - > view_embedding -> aggregate embedding -> total loss ->  update gradients
    decoder = Decoder(100)

    for epoch in range(epochs):
        loss = 0
        start = time.time()

        with tf.GradientTape() as tape:
            ag_embedding ={}


            for VAE in list(MagCAE.keys()):
                v_embedding, a_hat = MagCAE[VAE](views_features[VAE])
                ag_embedding.update({VAE:v_embedding})

            # aggregate embeddings
            embedding, aggregator = aggregate_embeddings(ag_embedding, agg)
            # reconstruct a_hat
            a_hat = decoder(embedding)
            loss += loss_function(a_hat, adj_targ, pos_weight, norm, attr_sim, embedding, num_nodes, lam, lose_func)

        if agg == "weighted_concat":
            variables = MagCAE['view1'].trainable_variables + MagCAE['view2'].trainable_variables + MagCAE['view3'].trainable_variables + aggregator.trainable_variables

        gradients = tape.gradient(loss, variables)
        optimizer.apply_gradients(zip(gradients, variables))

        # Evaluate on validate set
        embedding = np.array(embedding)
        roc_cur, ap_cur, _, _ = evaluate(val_edges, val_edges_false, adj_orig, embedding)

        print("Epoch {}: Val_Roc {:.4f}, Val_AP {:.4f}, Time Consumed {:.2f} sec\n".format(epoch+1, roc_cur, ap_cur, time.time()-start))

    print("Training Finished!")
    
    # Evaluation Result on test Edges
    test_embedding= {}
    for VAE in list(MagCAE.keys()):
        v_embedding, a_hat = MagCAE[VAE](views_features[VAE])
        test_embedding.update({VAE:v_embedding})

    # aggregate embeddings
    embedding, aggregator = aggregate_embeddings(test_embedding, agg)
    embedding = np.array(embedding) # embedding is a tensor, convert to np array.

    # reconstruct a_hat
    test_roc, test_ap, fpr, tpr = evaluate(test_edges, test_edges_false, adj_orig, embedding)
    print("MagCAE test result on {}".format(dataset))
    print("Test Roc: {}, Test AP: {}, P: {}, Training Ratio: {}, Lambda: {}.".format(test_roc, test_ap, p, tr, lam))
Esempio n. 26
0
def format_data(data_name, seq_len, time_decay):
    # Load data
    adjs, features = load_data(data_name, time_decay)

    # Store original adjacency matrix (without diagonal entries) for later
    adj_origs = []
    pos_weights = []
    norms = []
    adj_norms = []
    features_sp = []
    features_nonzeros = []

    num_node = np.array(adjs[0]).shape[1]
    feature_dim = np.array(features[0]).shape[1]

    for adj, feature in zip(adjs, features):
        adj_orig = sparse_to_tuple(adj)

        pos_weight = float(num_node * num_node -
                           adj_orig[1].sum()) / adj_orig[1].sum()
        norm = num_node * num_node / float(
            (num_node * num_node - adj_orig[1].sum()) * 2)

        feature = sparse_to_tuple(feature)
        features_nonzero = feature[1].shape[0]

        adj_norm = preprocess_graph(adj)

        adj_origs.append(adj_orig)
        pos_weights.append(pos_weight)
        norms.append(norm)
        features_sp.append(feature)
        features_nonzeros.append(features_nonzero)
        adj_norms.append(adj_norm)

    batch_size = len(adj_origs) - seq_len

    temporal_adj_origs = []
    temporal_pos_weights = []
    temporal_norms = []

    struct_adj_origs = []
    struct_pos_weights = []
    struct_norms = []
    struct_adj_norms = []
    struct_features = []
    struct_features_nonzeros = []

    for i in range(batch_size):
        temporal_adj_origs.append(adj_origs[i + 1:i + 1 + seq_len])
        temporal_pos_weights.append(pos_weights[i + 1:i + 1 + seq_len])
        temporal_norms.append(norms[i + 1:i + 1 + seq_len])

        struct_adj_origs.append(adj_origs[i:i + seq_len])
        struct_pos_weights.append(pos_weights[i:i + seq_len])
        struct_norms.append(norms[i:i + seq_len])
        struct_adj_norms.append(adj_norms[i:i + seq_len])
        struct_features.append(features_sp[i:i + seq_len])
        struct_features_nonzeros.append(features_nonzeros[i:i + seq_len])

    # temporal_adj_origs = adj_origs[1: 1+seq_len]
    # temporal_pos_weights = pos_weights[1: 1+seq_len]
    # temporal_norms = norms[1: 1+seq_len]
    #
    # struct_adj_origs = adj_origs[0: 0+seq_len]
    # struct_pos_weights = pos_weights[0: 0+seq_len]
    # struct_norms = norms[0: 0+seq_len]
    # struct_adj_norms = adj_norms[0: 0+seq_len]
    # struct_features = features_sp[0: 0+seq_len]
    # struct_features_nonzeros = features_nonzeros[0: 0+seq_len]

    feas = {
        'temporal_adj_origs': temporal_adj_origs,
        'temporal_pos_weights': temporal_pos_weights,
        'temporal_norms': temporal_norms,
        'num_node': num_node,
        'feature_dim': feature_dim,
        'batch_size': batch_size,
        'struct_adj_origs': struct_adj_origs,
        'struct_features': struct_features,
        'struct_features_nonzeros': struct_features_nonzeros,
        'struct_adj_norms': struct_adj_norms,
        'struct_pos_weights': struct_pos_weights,
        'struct_norms': struct_norms,
        'adj_norms': adj_norms,
        'features': features_sp
    }

    return feas
Esempio n. 27
0
# temp_adj = adj.todense()
# temp_feature = features.todense()

# Store original adjacency matrix (without diagonal entries) for later
adj_orig = adj
adj_orig = adj_orig - sp.dia_matrix(
    (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
adj_orig.eliminate_zeros()

adj_train = adj

if FLAGS.features == 0:
    features = sp.identity(features.shape[0])  # featureless
logging.info('preprocessing data')
# Some preprocessing
adj_norm = preprocess_graph(adj)
logging.info('done preprocessing data')
# Define placeholders
placeholders = {
    'features': tf.sparse_placeholder(tf.float32),
    'adj': tf.sparse_placeholder(tf.float32),
    'adj_orig': tf.sparse_placeholder(tf.float32),
    'dropout': tf.placeholder_with_default(0., shape=())
}

num_nodes = adj.shape[0]

features = sparse_to_tuple(features.tocoo())
num_features = features[2][1]
features_nonzero = features[1].shape[0]
logging.info('create model')
Esempio n. 28
0
Fa_train = sparse_to_tuple(fea_train.tocoo())
Fa_train = tf.SparseTensorValue(Fa_train[0], Fa_train[1], Fa_train[2])

# Define placeholders
placeholders = {
    'Fn': tf.sparse_placeholder(tf.float32,
                                (num_nodes, num_nodes + num_features)),
    'Fa': tf.sparse_placeholder(tf.float32, (num_nodes, num_features)),
    'adj_orig': tf.sparse_placeholder(tf.float32, (num_nodes, num_nodes)),
    'features_orig': tf.sparse_placeholder(tf.float32,
                                           (num_nodes, num_features)),
    'dropout': tf.placeholder_with_default(0., shape=())
}

# Create model
adj_train_mat = preprocess_graph(adj_train)
adj_train_mat = tf.cast(
    tf.SparseTensor(adj_train_mat[0], adj_train_mat[1], adj_train_mat[2]),
    tf.float32)
y_train = tf.cast(y_train, tf.float32)

model = SCVA(placeholders, adj_train_mat, num_features, num_nodes,
             features_nonzero, num_labels, labels_pos, y_train, one_gcn)

pos_weight_u = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
norm_u = adj.shape[0] * adj.shape[0] / float(
    (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
pos_weight_a = float(features[2][0] * features[2][1] - len(features[1])) / len(
    features[1])
norm_a = features[2][0] * features[2][1] / float(
    (features[2][0] * features[2][1] - len(features[1])) * 2)
Esempio n. 29
0
def main(args):
    """ Train GAE """
    print("Using {} dataset".format(args.dataset_str))
    # Load data
    np.random.seed(1)
    adj, features = load_data(args.dataset_str)
    N, D = features.shape

    # Store original adjacency matrix (without diagonal entries)
    adj_orig = adj
    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)

    # Some preprocessing
    adj_train_norm = preprocess_graph(adj_train)
    adj_train_norm = Variable(make_sparse(adj_train_norm))
    adj_train_labels = Variable(
        torch.FloatTensor(adj_train + sp.eye(adj_train.shape[0]).todense()))
    features = Variable(make_sparse(features))

    n_edges = adj_train_labels.sum()

    data = {
        'adj_norm': adj_train_norm,
        'adj_labels': adj_train_labels,
        'features': features,
    }

    gae = GAE(data,
              n_hidden=32,
              n_latent=16,
              dropout=args.dropout,
              subsampling=args.subsampling)

    optimizer = Adam({"lr": args.lr, "betas": (0.95, 0.999)})

    svi = SVI(gae.model, gae.guide, optimizer, loss="ELBO")

    # Results
    results = defaultdict(list)

    # Full batch training loop
    for epoch in range(args.num_epochs):
        # initialize loss accumulator
        epoch_loss = 0.
        # do ELBO gradient and accumulate loss
        epoch_loss += svi.step()
        # report training diagnostics
        if args.subsampling:
            normalized_loss = epoch_loss / float(2 * n_edges)
        else:
            normalized_loss = epoch_loss / (2 * N * N)

        results['train_elbo'].append(normalized_loss)

        # Training loss
        emb = gae.get_embeddings()
        accuracy, roc_curr, ap_curr, = eval_gae(val_edges, val_edges_false,
                                                emb, adj_orig)

        results['accuracy_train'].append(accuracy)
        results['roc_train'].append(roc_curr)
        results['ap_train'].append(ap_curr)

        print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
              "{:.5f}".format(normalized_loss), "train_acc=",
              "{:.5f}".format(accuracy), "val_roc=", "{:.5f}".format(roc_curr),
              "val_ap=", "{:.5f}".format(ap_curr))

        # Test loss
        if epoch % args.test_freq == 0:
            emb = gae.get_embeddings()
            accuracy, roc_score, ap_score = eval_gae(test_edges,
                                                     test_edges_false, emb,
                                                     adj_orig)
            results['accuracy_test'].append(accuracy)
            results['roc_test'].append(roc_curr)
            results['ap_test'].append(ap_curr)

    print("Optimization Finished!")

    # Test loss
    emb = gae.get_embeddings()
    accuracy, roc_score, ap_score = eval_gae(test_edges, test_edges_false, emb,
                                             adj_orig)
    print('Test Accuracy: ' + str(accuracy))
    print('Test ROC score: ' + str(roc_score))
    print('Test AP score: ' + str(ap_score))

    # Plot
    plot_results(results,
                 args.test_freq,
                 path=args.dataset_str + "_results.png")
Esempio n. 30
0
def train(placeholders, model, opt, adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, features, sess, name="single_fold"):

    adj = adj_train
    
    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() # N/P
    norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) # (N+P) x (N+P) / (2N)

    print (adj_train.shape)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)

    # Some preprocessing. adj_norm is D^(-1/2) x adj x D^(-1/2)
    adj_norm = preprocess_graph(adj)

    # session initialize
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    val_roc_score = []
    best_validation = 0.0

    num_nodes = adj.shape[0]

    edges_for_loss = np.ones((num_nodes*num_nodes), dtype=np.float32)

    ignore_edges = []
    edges_to_ignore = np.concatenate((val_edges, val_edges_false, test_edges, test_edges_false), axis=0)
    for e in edges_to_ignore:
        ignore_edges.append(e[0]*num_nodes+e[1])
    edges_for_loss[ignore_edges] = 0
    num_train = num_nodes * num_nodes - len(ignore_edges)

    last_best_epoch = 0

    # Train model
    for epoch in range(FLAGS.epochs):

        t = time.time()
        # Construct feed dictionary
        feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders)
        feed_dict.update({placeholders['dropout']: FLAGS.dropout})
        feed_dict.update({placeholders['is_training']: True})
        feed_dict.update({placeholders['norm']: norm})
        feed_dict.update({placeholders['pos_weight']: pos_weight})
        feed_dict.update({placeholders['edges_for_loss']: edges_for_loss})
        feed_dict.update({placeholders['num_train']: num_train})
        
        avg_x_cost = 0
        
        if model_str  == 'dglfrm':
            outs = sess.run([opt.opt_op, opt.cost, opt.accuracy, opt.x_loss, model.a, model.b, model.z_real, model.z_discrete], feed_dict=feed_dict)
            # a, b are global parameters
            a, b = np.log(1 + np.exp(outs[4])), np.log(1 + np.exp(outs[5]))
            a = np.mean(a)
            b = np.mean(b)
            #regularization = round(outs[3], 2)
            regularization = 0
            z_discrete = outs[7]
            z_real = outs[6]
            avg_x_cost = outs[3]
            W = None

        elif model_str == 'dglfrm_b':
            outs = sess.run([opt.opt_op, opt.cost, opt.accuracy, opt.x_loss, model.a, model.b, model.z], feed_dict=feed_dict)
            regularization = 0
            z_discrete = outs[6]
            z_real = None
            avg_x_cost = outs[3]
            W = None
                        
        # Compute average loss
        avg_cost = outs[1]
        avg_accuracy = outs[2]

        adj_rec, z_activated = get_score_matrix(sess, placeholders, feed_dict, model, S=1)
        roc_curr, ap_curr, _  = get_roc_score(adj_rec, val_edges, val_edges_false)
        
        print("Epoch:", '%03d' % (epoch + 1), "cost=", "{:.3f}".format(avg_cost), 
              "x_recon_loss=", "{:.2f}".format(avg_x_cost),
              "val_roc=", "{:.3f}".format(roc_curr), "val_ap=", "{:.3f}".format(ap_curr), 
              'activated_z=', "{:.1f}".format(z_activated), "time=", "{:.2f}".format(time.time() - t))

        roc_curr = round(roc_curr, 3)
        val_roc_score.append(roc_curr)

        # Look-ahead epochs: (We may need to train for some more epochs due to nested stochastic nature of the framework.) 
        if FLAGS.early_stopping != 0 and roc_curr > best_validation: 
            # save model
            print ('Saving model')
            saver.save(sess=sess, save_path=save_dir+name)
            best_validation = roc_curr
            last_best_epoch = 0

        if FLAGS.early_stopping != 0 and last_best_epoch > FLAGS.early_stopping:
            break
        else:
            last_best_epoch += 1

    print("Optimization Finished!")
    
    val_max_index = np.argmax(val_roc_score)
    
    print ('---------------------------------')

    print('Validation ROC Max: {:.3f} at Epoch: {:04d}'.format(val_roc_score[val_max_index], val_max_index))
            
    qual_file = 'data/qual_' + dataset_str + '_' + model_str

    if model_str == 'dglfrm':
        np.savez(qual_file, z_discrete=np.asarray(z_discrete), z_real=np.asarray(z_real), z_out=np.asarray(np.multiply(np.round(z_discrete), z_real)), adj_rec=adj_rec)
    elif model_str == 'dglfrm_b':
        np.savez(qual_file, z_discrete=np.asarray(z_discrete), adj_rec=adj_rec)
    
    if FLAGS.early_stopping != 0:
        saver.restore(sess=sess, save_path=(save_dir+name))

    adj_score, z_activated = get_score_matrix(sess, placeholders, feed_dict, model)

    return adj_score, z_activated