Python GCNModelAE Beispiele, model.GCNModelAE Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: constructor.py Projekt: zhuyulin1994/GCN_AnomalyDetection

def get_model(model_str, placeholders, num_features, num_nodes, features_nonzero):
    model = None
    if model_str == 'gcn_ae':
        model = GCNModelAE(placeholders, num_features, features_nonzero)

    elif model_str == 'gcn_vae':
        model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero)

    return model

Beispiel #2

0

Datei anzeigen

Datei: train_together.py Projekt: leereborn/gae

    num_nodes = adj.shape[0]

    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]

    sess = tf.Session()

    # Create model

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    gae_model = GCNModelAE(placeholders, num_features, features_nonzero, False,
                           FLAGS.bilinear)
    # Optimizer
    with tf.name_scope('optimizer'):
        opt = OptimizerAE(
            preds=gae_model.reconstructions,
            labels=tf.reshape(
                tf.sparse_tensor_to_dense(
                    placeholders[
                        'adj'],  # adj_orig in the original implementation
                    validate_indices=False),
                [-1]),
            pos_weight=pos_weight,
            norm=norm)

    sess = tf.Session()
    # Initialize session

Beispiel #3

0

Datei anzeigen

    'features': tf.sparse_placeholder(tf.float32),
    'adj': tf.sparse_placeholder(tf.float32),
    'adj_orig': tf.sparse_placeholder(tf.float32),
    'dropout': tf.placeholder_with_default(0., shape=())
}

num_nodes = adj.shape[0]

features = sparse_to_tuple(features.tocoo())
num_features = features[2][1]
features_nonzero = features[1].shape[0]
logging.info('create model')
# Create model
model = None
if model_str == 'gcn_ae':
    model = GCNModelAE(placeholders, num_features, features_nonzero)
elif model_str == 'gcn_vae':
    model = GCNModelVAE(placeholders, num_features, num_nodes,
                        features_nonzero)

pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
norm = adj.shape[0] * adj.shape[0] / float(
    (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
logging.info('optimizer')
# Optimizer
with tf.name_scope('optimizer'):
    if model_str == 'gcn_ae':
        opt = OptimizerAE(preds=model.reconstructions,
                          labels=tf.reshape(
                              tf.sparse_tensor_to_dense(
                                  placeholders['adj_orig'],

Beispiel #4

0

Datei anzeigen

Datei: trainGcn.py Projekt: fangyangbit/S-VGAE

def train_gcn(features, adj_train, train_edges, train_edges_false, test_edges,
              test_edges_false):
    # Settings
    flags = tf.app.flags
    FLAGS = flags.FLAGS
    flags.DEFINE_float('learning_rate', 0.005, 'Initial learning rate.')
    flags.DEFINE_integer('epochs', 200, 'Number of epochs to train.')
    flags.DEFINE_integer('hidden1', 96, 'Number of units in hidden layer 1.')
    flags.DEFINE_integer('hidden2', 48, 'Number of units in hidden layer 2.')
    flags.DEFINE_float('weight_decay', 0.,
                       'Weight for L2 loss on embedding matrix.')
    flags.DEFINE_float('dropout', 0., 'Dropout rate (1 - keep probability).')
    flags.DEFINE_string('model', 'gcn_vae', 'Model string.')
    flags.DEFINE_integer('features', 1,
                         'Whether to use features (1) or not (0).')

    model_str = FLAGS.model

    #1-dim index array, used in cost function to only focus on those interactions with high confidence
    mask_index = construct_optimizer_list(features.shape[0], train_edges,
                                          train_edges_false)

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj_train
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj = adj_train

    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

    # Some preprocessing
    adj_norm = preprocess_graph(adj)

    # Define placeholders
    placeholders = {
        'features': tf.sparse_placeholder(tf.float64),
        'adj': tf.sparse_placeholder(tf.float64),
        'adj_orig': tf.sparse_placeholder(tf.float64),
        'dropout': tf.placeholder_with_default(0., shape=())
    }

    num_nodes = adj.shape[0]

    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]

    # Create model
    model = None
    if model_str == 'gcn_ae':
        model = GCNModelAE(placeholders, num_features, features_nonzero)
    elif model_str == 'gcn_vae':
        model = GCNModelVAE(placeholders, num_features, num_nodes,
                            features_nonzero)

    pos_weight = 1
    norm = 1
    #pos_weight = train_edges_false.shape[0] / float(train_edges.shape[0])
    #norm = (train_edges.shape[0]+train_edges_false.shape[0]) / float(train_edges_false.shape[0]*train_edges_false.shape[0])

    # Optimizer
    with tf.name_scope('optimizer'):
        if model_str == 'gcn_ae':
            opt = OptimizerAE(preds=model.reconstructions,
                              labels=tf.reshape(
                                  tf.sparse_tensor_to_dense(
                                      placeholders['adj_orig'],
                                      validate_indices=False), [-1]),
                              pos_weight=pos_weight,
                              norm=norm,
                              mask=mask_index)
        elif model_str == 'gcn_vae':
            opt = OptimizerVAE(preds=model.reconstructions,
                               labels=tf.reshape(
                                   tf.sparse_tensor_to_dense(
                                       placeholders['adj_orig'],
                                       validate_indices=False), [-1]),
                               model=model,
                               num_nodes=num_nodes,
                               pos_weight=pos_weight,
                               norm=norm,
                               mask=mask_index)

    # Initialize session
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    adj_label = adj_train + sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)

    # Train model
    for epoch in range(FLAGS.epochs):

        t = time.time()
        # Construct feed dictionary
        feed_dict = construct_feed_dict(adj_norm, adj_label, features,
                                        placeholders)
        feed_dict.update({placeholders['dropout']: FLAGS.dropout})
        # Run single weight update
        outs = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict)

        print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
              "{:.5f}".format(outs[1]))

    print("Optimization Finished!")

    #return embedding for each protein
    emb = sess.run(model.z_mean, feed_dict=feed_dict)
    return emb

Beispiel #5

0

Datei anzeigen

    'features': tf.sparse_placeholder(tf.float32),
    'adj': tf.sparse_placeholder(tf.float32),
    'adj_orig': tf.sparse_placeholder(tf.float32),
    'dropout': tf.placeholder_with_default(0., shape=())
}

num_nodes = adj.shape[0]

features = sparse_to_tuple(features.tocoo())
num_features = features[2][1]
features_nonzero = features[1].shape[0]

# Create model
model = None
if model_str == 'gcn_ae':
    model = GCNModelAE(placeholders, num_features, features_nonzero,
                       FLAGS.hidden1, FLAGS.hidden2, FLAGS.hidden3)
elif model_str == 'gcn_vae':
    model = GCNModelVAE(placeholders, num_features, num_nodes,
                        features_nonzero, FLAGS.hidden1, FLAGS.hidden2)

pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
norm = adj.shape[0] * adj.shape[0] / float(
    (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

# Optimizer
with tf.name_scope('optimizer'):
    if model_str == 'gcn_ae':
        opt = OptimizerAE(preds=model.reconstructions,
                          labels=tf.reshape(
                              tf.sparse_tensor_to_dense(
                                  placeholders['adj_orig'],

Beispiel #6

0

Datei anzeigen

num_features = adj.shape[1]

# Define placeholders
placeholders = {
    'features':
    tf.placeholder(tf.float32, [args.batch_size, num_nodes, num_features]),
    'adj_norm':
    tf.placeholder(tf.float32, [args.batch_size, num_nodes, num_nodes]),
    'adj_orig':
    tf.placeholder(tf.float32, [args.batch_size, num_nodes, num_nodes]),
    'dropout':
    tf.placeholder_with_default(0., shape=())
}

# Create model
model = GCNModelAE(placeholders, num_features, num_nodes, args)

# Initialize session
sess = tf.Session()

# Train model
saver = tf.train.Saver()
# model_name = "./models/brain_vgae_100_50_autoencoder=False_kl_coefficient=0.001_act=tanh.ckpt"
model_name = "./models/brain_vgae_100_50_autoencoder=True.ckpt"

print("Analyzing " + model_name)

with tf.Session() as sess:
    saver.restore(sess, model_name)

    features_batch = np.zeros([args.batch_size, num_nodes, num_features],

Beispiel #7

0

Datei anzeigen

Datei: train.py Projekt: WINGHARE/trainsource

def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    adj, features, y_test, tx, ty, test_maks, true_labels = load_data(
        args.dataset_str)
    n_nodes, feat_dim = features.shape

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)
    adj = adj_train

    # Before proceeding further, make the structure for doing deepWalk
    if args.dw == 1:
        print('Using deepWalk regularization...')
        G = load_edgelist_from_csr_matrix(adj_orig, undirected=True)
        print("Number of nodes: {}".format(len(G.nodes())))
        num_walks = len(G.nodes()) * args.number_walks
        print("Number of walks: {}".format(num_walks))
        data_size = num_walks * args.walk_length
        print("Data size (walks*length): {}".format(data_size))

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    # adj_label = torch.DoubleTensor(adj_label.toarray())
    adj_label = torch.FloatTensor(adj_label.toarray())

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    if args.model == 'gcn_vae':
        model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout)
    else:
        model = GCNModelAE(feat_dim, args.hidden1, args.hidden2, args.dropout)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    if args.dw == 1:
        sg = SkipGram(args.hidden2, adj.shape[0])
        optimizer_dw = optim.Adam(sg.parameters(), lr=args.lr_dw)

        # Construct the nodes for doing random walk. Doing it before since the seed is fixed
        nodes_in_G = list(G.nodes())
        chunks = len(nodes_in_G) // args.number_walks
        random.Random().shuffle(nodes_in_G)

    hidden_emb = None
    for epoch in tqdm(range(args.epochs)):
        t = time.time()
        model.train()
        optimizer.zero_grad()
        z, mu, logvar = model(features, adj_norm)

        # After back-propagating gae loss, now do the deepWalk regularization
        if args.dw == 1:
            sg.train()
            if args.full_number_walks > 0:
                walks = build_deepwalk_corpus(G,
                                              num_paths=args.full_number_walks,
                                              path_length=args.walk_length,
                                              alpha=0,
                                              rand=random.Random(SEED))
            else:
                walks = build_deepwalk_corpus_iter(
                    G,
                    num_paths=args.number_walks,
                    path_length=args.walk_length,
                    alpha=0,
                    rand=random.Random(SEED),
                    chunk=epoch % chunks,
                    nodes=nodes_in_G)
            for walk in walks:
                if args.context == 1:
                    # Construct the pairs for predicting context node
                    # for each node, treated as center word
                    curr_pair = (int(walk[center_node_pos]), [])
                    for center_node_pos in range(len(walk)):
                        # for each window position
                        for w in range(-args.window_size,
                                       args.window_size + 1):
                            context_node_pos = center_node_pos + w
                            # make soure not jump out sentence
                            if context_node_pos < 0 or context_node_pos >= len(
                                    walk
                            ) or center_node_pos == context_node_pos:
                                continue
                            context_node_idx = walk[context_node_pos]
                            curr_pair[1].append(int(context_node_idx))
                else:
                    # first item in the walk is the starting node
                    curr_pair = (int(walk[0]), [
                        int(context_node_idx) for context_node_idx in walk[1:]
                    ])

                if args.ns == 1:
                    neg_nodes = []
                    pos_nodes = set(walk)
                    while len(neg_nodes) < args.walk_length - 1:
                        rand_node = random.randint(0, n_nodes - 1)
                        if rand_node not in pos_nodes:
                            neg_nodes.append(rand_node)
                    neg_nodes = torch.from_numpy(np.array(neg_nodes)).long()

                # Do actual prediction
                src_node = torch.from_numpy(np.array([curr_pair[0]])).long()
                tgt_nodes = torch.from_numpy(np.array(curr_pair[1])).long()
                optimizer_dw.zero_grad()
                log_pos = sg(src_node, tgt_nodes, neg_sample=False)
                if args.ns == 1:
                    loss_neg = sg(src_node, neg_nodes, neg_sample=True)
                    loss_dw = log_pos + loss_neg
                else:
                    loss_dw = log_pos
                loss_dw.backward(retain_graph=True)
                cur_dw_loss = loss_dw.item()
                optimizer_dw.step()

        loss = loss_function(preds=model.dc(z),
                             labels=adj_label,
                             mu=mu,
                             logvar=logvar,
                             n_nodes=n_nodes,
                             norm=norm,
                             pos_weight=pos_weight)
        loss.backward()
        cur_loss = loss.item()
        optimizer.step()

        hidden_emb = mu.data.numpy()
        roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges,
                                          val_edges_false)

        if args.dw == 1:
            tqdm.write(
                "Epoch: {}, train_loss_gae={:.5f}, train_loss_dw={:.5f}, val_ap={:.5f}, time={:.5f}"
                .format(epoch + 1, cur_loss, cur_dw_loss, ap_curr,
                        time.time() - t))
        else:
            tqdm.write(
                "Epoch: {}, train_loss_gae={:.5f}, val_ap={:.5f}, time={:.5f}".
                format(epoch + 1, cur_loss, ap_curr,
                       time.time() - t))

        if (epoch + 1) % 10 == 0:
            tqdm.write("Evaluating intermediate results...")
            kmeans = KMeans(n_clusters=args.n_clusters,
                            random_state=0).fit(hidden_emb)
            predict_labels = kmeans.predict(hidden_emb)
            cm = clustering_metrics(true_labels, predict_labels)
            cm.evaluationClusterModelFromLabel(tqdm)
            roc_score, ap_score = get_roc_score(hidden_emb, adj_orig,
                                                test_edges, test_edges_false)
            tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score))
            np.save('logs/emb_epoch_{}.npy'.format(epoch + 1), hidden_emb)

    tqdm.write("Optimization Finished!")

    roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges,
                                        test_edges_false)
    tqdm.write('Test ROC score: ' + str(roc_score))
    tqdm.write('Test AP score: ' + str(ap_score))
    kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb)
    predict_labels = kmeans.predict(hidden_emb)
    cm = clustering_metrics(true_labels, predict_labels)
    cm.evaluationClusterModelFromLabel(tqdm)

    if args.plot == 1:
        cm.plotClusters(tqdm, hidden_emb, true_labels)

Beispiel #8

0

Datei anzeigen

    def runner(self):
        model_str = FLAGS.model
        placeholders = [{
            'features':
            tf.sparse_placeholder(tf.float32),
            'adj':
            tf.sparse_placeholder(tf.float32),
            'adj_orig':
            tf.sparse_placeholder(tf.float32),
            'dropout':
            tf.placeholder_with_default(0., shape=()),
            'num_features':
            tf.placeholder(tf.float32),
            'features_nonzero':
            tf.placeholder(tf.float32),
            'pos_weight':
            tf.placeholder(tf.float32),
            'norm':
            tf.placeholder(tf.float32),
            'reward':
            tf.placeholder(tf.float32),
            'D_W1':
            tf.placeholder_with_default(
                tf.zeros([FLAGS.g_hidden2, FLAGS.d_hidden1]),
                shape=[FLAGS.g_hidden2, FLAGS.d_hidden1]),
            'D_W2':
            tf.placeholder_with_default(tf.zeros([FLAGS.d_hidden1, 1]),
                                        shape=[FLAGS.d_hidden1, 1]),
            'D_b1':
            tf.placeholder_with_default(tf.zeros([FLAGS.d_hidden1]),
                                        shape=[FLAGS.d_hidden1]),
            'D_b2':
            tf.placeholder_with_default(tf.zeros([1]), shape=[1]),
        }, {
            'features': tf.sparse_placeholder(tf.float32),
            'adj': tf.sparse_placeholder(tf.float32),
            'adj_orig': tf.sparse_placeholder(tf.float32),
            'dropout': tf.placeholder_with_default(0., shape=()),
            'num_features': tf.sparse_placeholder(tf.float32),
            'features_nonzero': tf.placeholder(tf.float32),
            'pos_weight': tf.placeholder(tf.float32),
            'norm': tf.placeholder(tf.float32),
            'reward': tf.placeholder(tf.float32)
        }]
        sess = tf.Session()

        real_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2])
        fake_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2])

        self.D_W1 = tf.Variable(xavier_init([FLAGS.g_hidden2,
                                             FLAGS.d_hidden1]))
        self.D_b1 = tf.Variable(xavier_init([FLAGS.d_hidden1]))
        self.D_W2 = tf.Variable(xavier_init([FLAGS.d_hidden1, 1]))
        self.D_b2 = tf.Variable(xavier_init([1]))
        d_vars = [self.D_W1, self.D_b1, self.D_W2, self.D_b2]

        print('train for the network embedding...')
        # Load data
        dataset_str1 = 'Douban_offline'  # 1118 nodes
        dataset_str2 = 'Douban_online'  # 3906 nodes
        adj1, features1, fea_num1 = load_data(dataset_str1)
        adj2, features2, fea_num2 = load_data(dataset_str2)
        num_features = [features1.shape[1], features2.shape[1]]

        model = None

        if model_str == 'gcn_ae':
            model = GCNModelAE(placeholders, num_features, sess)
        elif model_str == 'gcn_vae':
            model = GCNModelVAE(placeholders, num_features, num_nodes,
                                features_nonzero)

        # Optimizer

        with tf.name_scope('optimizer'):
            opt = OptimizerAE(
                preds=[model.reconstructions1, model.reconstructions2],
                labels=[
                    tf.reshape(
                        tf.sparse_tensor_to_dense(placeholders[0]['adj_orig'],
                                                  validate_indices=False),
                        [-1]),
                    tf.reshape(
                        tf.sparse_tensor_to_dense(placeholders[1]['adj_orig'],
                                                  validate_indices=False),
                        [-1])
                ],
                preds_attribute=[
                    model.attribute_reconstructions1,
                    model.attribute_reconstructions1
                ],
                labels_attribute=[
                    tf.sparse_tensor_to_dense(placeholders[0]['features']),
                    tf.sparse_tensor_to_dense(placeholders[1]['features'])
                ],
                pos_weight=[
                    placeholders[0]['pos_weight'],
                    placeholders[1]['pos_weight']
                ],
                norm=[placeholders[0]['norm'], placeholders[1]['norm']],
                fake_logits=model.fake_logits,
                alpha=FLAGS.AX_alpha)

        real_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2])
        fake_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2])

        real_logits, fake_logits = self.discriminator(real_X, fake_X)
        real_prob = tf.reduce_mean(real_logits)
        fake_prob = tf.reduce_mean(fake_logits)
        D_loss = -real_prob + fake_prob
        dis_optimizer = tf.train.AdamOptimizer(
            learning_rate=FLAGS.learning_rate_dis)  # Adam Optimizer
        opt_dis = dis_optimizer.minimize(D_loss, var_list=d_vars)

        sess.run(tf.global_variables_initializer())
        final_emb1 = []
        final_emb2 = []
        emb1_id = []
        emb2_id = []
        local_A_1 = adj1
        local_X_1 = features1
        local_A_2 = adj2
        local_X_2 = features2

        adj_norm_1 = preprocess_graph(local_A_1)
        local_X_1 = sparse_to_tuple(local_X_1.tocoo())
        pos_weight_1 = float(local_A_1.shape[0] * local_A_1.shape[0] -
                             local_A_1.sum()) / local_A_1.sum()
        adj_label_1 = local_A_1 + sp.eye(local_A_1.shape[0])
        adj_label_1 = sparse_to_tuple(adj_label_1)
        norm_1 = local_A_1.shape[0] * local_A_1.shape[0] / float(
            (local_A_1.shape[0] * local_A_1.shape[0] - local_A_1.sum()) * 2)

        adj_norm_2 = preprocess_graph(local_A_2)
        local_X_2 = sparse_to_tuple(local_X_2.tocoo())
        pos_weight_2 = float(local_A_2.shape[0] * local_A_2.shape[0] -
                             local_A_2.sum()) / local_A_2.sum()
        adj_label_2 = local_A_2 + sp.eye(local_A_2.shape[0])
        adj_label_2 = sparse_to_tuple(adj_label_2)
        norm_2 = local_A_2.shape[0] * local_A_2.shape[0] / float(
            (local_A_2.shape[0] * local_A_2.shape[0] - local_A_2.sum()) * 2)

        self.tmp_count = {}

        for epoch in range(FLAGS.epoch):
            for circle_epoch in range(FLAGS.circle_epoch):
                for G_epoch in range(FLAGS.g_epoch):
                    # ------------------------------------------------------------------------------------------
                    feed_dict = construct_feed_dict(
                        [adj_norm_2, adj_norm_1], [adj_label_2, adj_label_1],
                        [local_X_2, local_X_1], [pos_weight_2, pos_weight_1],
                        [norm_2, norm_1], placeholders)
                    feed_dict.update(
                        {placeholders[0]['D_W1']: sess.run(self.D_W1)})
                    feed_dict.update(
                        {placeholders[0]['D_W2']: sess.run(self.D_W2)})
                    feed_dict.update(
                        {placeholders[0]['D_b1']: sess.run(self.D_b1)})
                    feed_dict.update(
                        {placeholders[0]['D_b2']: sess.run(self.D_b2)})

                    _, embeddings1_, embeddings2_, gcn_cost, fake_prob_, attr_cost = sess.run(
                        [
                            opt.opt_op, model.embeddings1, model.embeddings2_,
                            opt.cost, model.fake_prob, opt.attribute_cost
                        ],
                        feed_dict=feed_dict)

                for D_epoch in range(FLAGS.d_epoch):
                    feed_dict.update(
                        {placeholders[0]['dropout']: FLAGS.dropout})
                    emb1, emb2 = sess.run(
                        [model.embeddings1, model.embeddings2_],
                        feed_dict=feed_dict)
                    _, real_prob_, fake_prob_ = sess.run(
                        [opt_dis, real_prob, fake_prob],
                        feed_dict={
                            real_X: emb1,
                            fake_X: emb2
                        })

            if epoch % 1 == 0:

                emb1, emb2 = sess.run([model.embeddings1, model.embeddings2_],
                                      feed_dict=feed_dict)
                final_emb1 = np.array(emb1)
                final_emb2 = np.array(emb2)

                similar_matrix = cosine_similarity(final_emb1, final_emb2)

                self.similar_matrix = similar_matrix

                pair = {}
                gnd = np.loadtxt("data/douban_truth.emb")
                count = {}
                topk = [1, 5, 10, 20, 30, 50]
                for i in range(len(topk)):
                    pair[topk[i]] = []
                    count[topk[i]] = 0
                    self.tmp_count[topk[i]] = 0
                for top in topk:
                    for index in range(similar_matrix.shape[0]):
                        top_index = heapq.nlargest(
                            int(top), range(len(similar_matrix[index])),
                            similar_matrix[index].take)
                        top_index = list(map(lambda x: x + 1, top_index))
                        pair[top].append([index + 1, top_index])
                    for ele_1 in gnd:
                        for ele_2 in pair[top]:
                            if ele_1[0] == ele_2[0]:
                                if ele_1[1] in ele_2[1]:
                                    count[top] += 1

                print(
                    f'-----------------------epoch {epoch}------------------------'
                )
                for top in topk:
                    print("top", '%02d' % (top), "count=", '%d' % (count[top]),
                          "precision=", "{:.5f}".format(count[top] / len(gnd)))
                print(
                    f'-----------------------epoch {epoch}------------------------'
                )

Beispiel #9

0

Datei anzeigen

adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
    adj)

adj = adj_train

adj_norm = preprocess_graph(adj)
adj_label = adj_train + sp.eye(adj_train.shape[0])
adj_label = torch.FloatTensor(adj_label.toarray())

pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
norm = adj.shape[0] * adj.shape[0] / float(
    (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

# Model and optimizer
model = GCNModelAE(nfeat=features.shape[1],
                   nhid=args.hidden,
                   nclass=args.nclass,
                   dropout=args.dropout)

optimizer = optim.Adam(model.parameters(),
                       lr=args.lr,
                       weight_decay=args.weight_decay)

indices = []
losses = []


def train(epoch):
    with torch.autograd.set_detect_anomaly(True):
        t = time.time()
        model.train()
        optimizer.zero_grad()

Beispiel #10

0

Datei anzeigen

        features = sparse_to_tuple(features.tocoo())
        num_features = features[2][1]
        features_nonzero = features[1].shape[0]

        sess = tf.Session()

        # Create model
        model = None
        if FLAGS.multihead_attn:
            model = MultiHeadedGAE(placeholders, num_features,
                                   features_nonzero)
        else:
            if model_str == 'gcn_ae':
                model = GCNModelAE(placeholders, num_features,
                                   features_nonzero, FLAGS.attention,
                                   FLAGS.bilinear)
            elif model_str == 'gcn_vae':
                model = GCNModelVAE(placeholders, num_features, num_nodes,
                                    features_nonzero, FLAGS.attention)

        pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
        norm = adj.shape[0] * adj.shape[0] / float(
            (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

        # Optimizer
        with tf.name_scope('optimizer'):
            if model_str == 'gcn_ae':
                opt = OptimizerAE(
                    preds=model.reconstructions,
                    labels=tf.reshape(

Beispiel #11

0

Datei anzeigen

Datei: train.py Projekt: ciortanmadalina/graph-sc

def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    adj, features, y_test, tx, ty, test_maks, true_labels = load_data(args.dataset_str)
    n_nodes, feat_dim = features.shape

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
    adj = adj_train

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    # adj_label = torch.DoubleTensor(adj_label.toarray())
    adj_label = torch.FloatTensor(adj_label.toarray())

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    if args.model == 'gcn_vae':
        model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout)
    else:
        model = GCNModelAE(feat_dim, args.hidden1, args.hidden2, args.dropout)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    hidden_emb = None
    for epoch in tqdm(range(args.epochs)):
        t = time.time()
        model.train()
        optimizer.zero_grad()
        z, mu, logvar = model(features, adj_norm)

        loss = loss_function(preds=model.dc(z), labels=adj_label,
                             mu=mu, logvar=logvar, n_nodes=n_nodes,
                             norm=norm, pos_weight=pos_weight)
        loss.backward()
        cur_loss = loss.item()
        optimizer.step()

        hidden_emb = mu.data.numpy()
        roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false)
        
        tqdm.write("Epoch: {}, train_loss_gae={:.5f}, val_ap={:.5f}, time={:.5f}".format(
            epoch + 1, cur_loss,
            ap_curr, time.time() - t))

        if (epoch + 1) % 10 == 0:
            tqdm.write("Evaluating intermediate results...")
            kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb)
            predict_labels = kmeans.predict(hidden_emb)
            cm = clustering_metrics(true_labels, predict_labels)
            cm.evaluationClusterModelFromLabel(tqdm)
            roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false)
            tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score))
            np.save('logs/emb_epoch_{}.npy'.format(epoch + 1), hidden_emb)

    tqdm.write("Optimization Finished!")

    roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false)
    tqdm.write('Test ROC score: ' + str(roc_score))
    tqdm.write('Test AP score: ' + str(ap_score))
    kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb)
    predict_labels = kmeans.predict(hidden_emb)
    cm = clustering_metrics(true_labels, predict_labels)
    cm.evaluationClusterModelFromLabel(tqdm)

    if args.plot == 1:
        cm.plotClusters(tqdm, hidden_emb, true_labels)

Beispiel #12

0

Datei anzeigen

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

with open('test1.pkl', 'rb') as f:
    x = pickle.load(f)
    print(x.shape)
adj, features = load_data(x, False)

G = nx.from_numpy_matrix(adj.toarray())
adj_train = preprocess_graph(adj)

model = GCNModelAE(nfeat=features.shape[1],
                   nhid=args.hidden,
                   nclass=args.ndim,
                   dropout=args.dropout)
model.load_state_dict(torch.load(args.saved_model))
model.eval()

output = model(features, adj_train)
output = try_data(output)
output = output.detach().numpy()

# Normalize the output data
data = output
# scaler = StandardScaler().fit(output)
# data = scaler.transform(output)

# Convert to pandas
meta_df = {}

Beispiel #13

0

Datei anzeigen

# Load data
adj, features = load_data(dataset_str)

# Store original adjacency matrix (without diagonal entries) for later
adj_orig = adj
adj_orig = adj_orig - sp.dia_matrix(
    (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
adj_orig.eliminate_zeros()

adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
    adj)
adj = adj_train

if args.features == 0:
    features = sp.identiy(features.shape[0])  # featureless

adj_norm = preprocess_graph(adj)

num_nodes = adj.shape[0]

features = sparse_to_tuple(features.tocoo())
num_features = features[2][1]
features_nonzero = features[1].shape[0]

# Create Model
model = None
if model_str == 'gcn_ae':
    model = GCNModelAE(num_features, features_nonzero)
elif model_str == 'gcn_vae':
    model = GCNModelVAE(num_features, num_nodes, features_nonzero)

Beispiel #14

0

Datei anzeigen

Datei: run.py Projekt: seigercom/Networkalignment

}, {
    # 'features': tf.sparse_placeholder(tf.float32),
    'adj': tf.sparse_placeholder(tf.float32),
    'adj_orig': tf.sparse_placeholder(tf.float32),
    'features': tf.sparse_placeholder(tf.float32,shape=tf.constant(A2[2], dtype=tf.int64)),
}]
dropout = tf.placeholder(tf.float32)
num_nodes = [S1_ori[2][0],S2_ori[2][0]]
num_features = [A1[2][1],A2[2][1]]
features_nonzero = [A1[1].shape[0],A2[1].shape[0]]

# Create model
#map_model = Discriminator()
# model = None
if model_str == 'gcn_ae':
    model = GCNModelAE(placeholders, num_features = num_features, features_nonzero = features_nonzero,dropout=dropout,flag=True)
elif model_str == 'gcn_vae':
    model = GCNModelVAE(placeholders, num_features = num_features,num_nodes=num_nodes, features_nonzero = features_nonzero,dropout=dropout)


pos_weight1 = float(S1.shape[0] * S1.shape[0] - S1.sum()) / S1.sum()
norm1 = S1.shape[0] * S1.shape[0] / float((S1.shape[0] * S1.shape[0] - S1.sum()) * 2)
pos_weight2 = float(S2.shape[0] * S2.shape[0] - S2.sum()) / S2.sum()
norm2 = S2.shape[0] * S2.shape[0] / float((S2.shape[0] * S2.shape[0] - S2.sum()) * 2)

pos_weight = [pos_weight1,pos_weight2]
norm = [norm1,norm2]

# Optimizer
with tf.name_scope('optimizer'):
    if model_str == 'gcn_ae':

Beispiel #15

0

Datei anzeigen

Datei: train.py Projekt: codenameAggie/traffic_graph_VGAE

    'features': tf.sparse_placeholder(tf.float32),
    'adj': tf.sparse_placeholder(tf.float32),
    'adj_orig': tf.sparse_placeholder(tf.float32),
    'dropout': tf.placeholder_with_default(0., shape=())
}
# tf.placeholder
num_nodes = adj.shape[0]

features = sparse_to_tuple(features.tocoo())
num_features = features[2][1]
features_nonzero = features[1].shape[0]

# Create model
model = None
if model_str == 'gcn_ae':
    model = GCNModelAE(placeholders, num_features, features_nonzero)
elif model_str == 'gcn_vae':
    model = GCNModelVAE(placeholders, num_features, num_nodes,
                        features_nonzero)

pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
norm = adj.shape[0] * adj.shape[0] / float(
    (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

# Optimizer
with tf.name_scope('optimizer'):
    if model_str == 'gcn_ae':
        opt = OptimizerAE(preds=model.reconstructions,
                          labels=tf.sparse_tensor_to_dense(
                              placeholders['adj_orig'],
                              pos_weight=pos_weight,