Exemple #1
0
def train_step(encoder, decoder, inp, targ, targ_lang, enc_hidden, BATCH_SIZE):
    loss = 0

    with tf.GradientTape() as tape:
        enc_output, enc_hidden = encoder(inp, enc_hidden)
        dec_hidden = enc_hidden
        dec_input = tf.expand_dims([targ_lang.word_index['<start>']] *
                                   BATCH_SIZE, 1)

        # 以文本长度为主,遍历所有词语
        for t in range(1, targ.shape[1]):
            # 将编码器输出 (enc_output) 传送至解码器
            predictions, dec_hidden, _ = decoder(dec_input, dec_hidden,
                                                 enc_output)

            # 这里输入的是一个batch
            loss += optimizer.loss_function(targ[:, t], predictions)

            # 教师强制 - 将目标词作为下一个输入,一个batch的循环
            dec_input = tf.expand_dims(targ[:, t], 1)
    batch_loss = (loss / int(targ.shape[1]))
    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.optimizer.apply_gradients(zip(gradients, variables))
    return batch_loss
Exemple #2
0
def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    adj, features = load_data(args.dataset_str)
    # print(features)
    # exit(0)
    n_nodes, feat_dim = features.shape
    print("#nodes={}".format(n_nodes))

    # Store original adjacency matrix (without diagonal entries) for later
    print(
        "Store original adjacency matrix (without diagonal entries) for later..."
    )
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    print("Sample the edges for training and testing...")

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)
    adj = adj_train

    # Some preprocessing
    print("Some preprocessing...")
    adj_norm = preprocess_graph(adj)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    adj_label = torch.FloatTensor(adj_label.toarray())

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    hidden_emb = None
    for epoch in range(args.epochs):
        t = time.time()
        model.train()
        optimizer.zero_grad()
        recovered, mu, logvar = model(features, adj_norm)
        pos_weight = torch.Tensor([pos_weight])
        loss = loss_function(preds=recovered,
                             labels=adj_label,
                             mu=mu,
                             logvar=logvar,
                             n_nodes=n_nodes,
                             norm=norm,
                             pos_weight=pos_weight)
        loss.backward()
        cur_loss = loss.item()
        optimizer.step()

        hidden_emb = mu.data.numpy()
        roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges,
                                          val_edges_false)

        print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
              "{:.5f}".format(cur_loss), "val_ap=", "{:.5f}".format(ap_curr),
              "time=", "{:.5f}".format(time.time() - t))

    print("Optimization Finished!")

    roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges,
                                        test_edges_false)
    print('Test ROC score: ' + str(roc_score))
    print('Test AP score: ' + str(ap_score))
def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    adj, features = load_data(args.dataset_str)
    features = features.to(args.device)
    n_nodes, feat_dim = features.shape

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)
    adj = adj_train

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    adj_norm = adj_norm.to(args.device)

    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    adj_label = torch.FloatTensor(adj_label.toarray())
    adj_orig_tile = adj_label.unsqueeze(2).repeat(1, 1, args.K)
    adj_orig_tile = adj_orig_tile.to(args.device)

    pos_weight = torch.tensor(
        float(adj.shape[0] * adj.shape[0] - adj.sum()) /
        adj.sum()).float().to(device=args.device)
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    psi_input_dim = args.noise_dim + feat_dim
    logv_input_dim = feat_dim

    model = GCNModelVAE(psi_input_dim, logv_input_dim, args.hidden1,
                        args.hidden2, args.dropout, args.K, args.J,
                        args.noise_dim, args.device).to(args.device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    for epoch in range(args.epochs):
        warm_up = torch.min(torch.FloatTensor([epoch / 300,
                                               1])).to(args.device)

        t = time.time()
        model.train()
        optimizer.zero_grad()

        reconstruct_iw, log_prior_iw, log_H_iw, psi_iw_vec = model(
            features, adj_norm)
        hidden_emb = psi_iw_vec.data.contiguous().cpu().numpy()

        loss = loss_function(reconstructed_iw=reconstruct_iw,
                             log_prior_iw=log_prior_iw,
                             log_H_iw=log_H_iw,
                             adj_orig_tile=adj_orig_tile,
                             nodes=n_nodes,
                             K=args.K,
                             pos_weight=pos_weight,
                             norm=norm,
                             warm_up=warm_up,
                             device=args.device)

        loss.backward()
        cur_loss = loss.item()
        optimizer.step()

        roc_score_val, ap_score_val = get_roc_score(hidden_emb, adj_orig,
                                                    val_edges, val_edges_false)
        roc_score_test, ap_score_test = get_roc_score(hidden_emb, adj_orig,
                                                      test_edges,
                                                      test_edges_false)

        print('Epoch:', '%04d --->   ' % (epoch + 1),
              'training_loss = {:.5f}   '.format(cur_loss),
              'val_AP = {:.5f}   '.format(ap_score_val),
              'val_ROC = {:.5f}   '.format(roc_score_val),
              'test_AP = {:.5f}   '.format(ap_score_test),
              'test_ROC = {:.5f}   '.format(roc_score_test),
              'time = {:.5f}   '.format(time.time() - t))

        writer.add_scalar('Loss/train_loss', cur_loss, epoch)

        writer.add_scalar('Average Precision/test', ap_score_test, epoch)
        writer.add_scalar('Average Precision/val', ap_score_val, epoch)

        writer.add_scalar('Area under Roc(AUC)/test', roc_score_test, epoch)
        writer.add_scalar('Area under Roc(AUC)/val', roc_score_val, epoch)

    print("Optimization Finished!")
Exemple #4
0
def gae_for(args):
    print("Using {} dataset".format(args.dataset))
    if args.dataset == 'cora':
        n_clusters = 7
        Cluster = SpectralClustering(n_clusters=n_clusters,
                                     affinity='precomputed',
                                     random_state=0)
    elif args.dataset == 'citeseer':
        n_clusters = 6
        Cluster = SpectralClustering(n_clusters=n_clusters,
                                     affinity='precomputed',
                                     random_state=0)
    elif args.dataset == 'pubmed':
        n_clusters = 3
        Cluster = SpectralClustering(n_clusters=n_clusters,
                                     affinity='precomputed',
                                     random_state=0)
    elif args.dataset == 'wiki':
        n_clusters = 17
        Cluster = SpectralClustering(n_clusters=n_clusters,
                                     affinity='precomputed',
                                     random_state=0)

    adj, features, true_labels, idx_train, idx_val, idx_test = load_data(
        args.dataset)
    n_nodes, feat_dim = features.shape
    dims = [feat_dim] + args.dims

    layers = args.linlayers
    # Store original adjacency matrix (without diagonal entries) for later

    adj = adj - sp.dia_matrix(
        (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()

    #adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)

    n = adj.shape[0]

    adj_norm_s = preprocess_graph(adj, args.gnnlayers, norm='sym', renorm=True)
    sm_fea_s = sp.csr_matrix(features).toarray()

    print('Laplacian Smoothing...')
    for a in adj_norm_s:
        sm_fea_s = a.dot(sm_fea_s)
    adj_1st = (adj + sp.eye(n)).toarray()

    db, best_acc, best_nmi, best_adj = clustering(Cluster, sm_fea_s,
                                                  true_labels)

    best_cl = db
    adj_label = torch.FloatTensor(adj_1st)

    model = LinTrans(layers, dims)

    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    sm_fea_s = torch.FloatTensor(sm_fea_s)
    adj_label = adj_label.reshape([
        -1,
    ])

    if args.cuda:
        model.cuda()
        inx = sm_fea_s.cuda()
        adj_label = adj_label.cuda()

    pos_num = len(adj.indices)
    neg_num = n_nodes * n_nodes - pos_num

    up_eta = (args.upth_ed - args.upth_st) / (args.epochs / args.upd)
    low_eta = (args.lowth_ed - args.lowth_st) / (args.epochs / args.upd)

    pos_inds, neg_inds = update_similarity(normalize(sm_fea_s.numpy()),
                                           args.upth_st, args.lowth_st,
                                           pos_num, neg_num)
    upth, lowth = update_threshold(args.upth_st, args.lowth_st, up_eta,
                                   low_eta)

    bs = min(args.bs, len(pos_inds))
    length = len(pos_inds)

    pos_inds_cuda = torch.LongTensor(pos_inds).cuda()
    print('Start Training...')
    for epoch in tqdm(range(args.epochs)):

        st, ed = 0, bs
        batch_num = 0
        model.train()
        length = len(pos_inds)

        while (ed <= length):
            sampled_neg = torch.LongTensor(
                np.random.choice(neg_inds, size=ed - st)).cuda()
            sampled_inds = torch.cat((pos_inds_cuda[st:ed], sampled_neg), 0)
            t = time.time()
            optimizer.zero_grad()
            xind = sampled_inds // n_nodes
            yind = sampled_inds % n_nodes
            x = torch.index_select(inx, 0, xind)
            y = torch.index_select(inx, 0, yind)
            zx = model(x)
            zy = model(y)
            batch_label = torch.cat(
                (torch.ones(ed - st), torch.zeros(ed - st))).cuda()
            batch_pred = model.dcs(zx, zy)
            loss = loss_function(adj_preds=batch_pred,
                                 adj_labels=batch_label,
                                 n_nodes=ed - st)

            loss.backward()
            cur_loss = loss.item()
            optimizer.step()

            st = ed
            batch_num += 1
            if ed < length and ed + bs >= length:
                ed += length - ed
            else:
                ed += bs

        if (epoch + 1) % args.upd == 0:
            model.eval()
            mu = model(inx)
            hidden_emb = mu.cpu().data.numpy()
            upth, lowth = update_threshold(upth, lowth, up_eta, low_eta)
            pos_inds, neg_inds = update_similarity(hidden_emb, upth, lowth,
                                                   pos_num, neg_num)
            bs = min(args.bs, len(pos_inds))
            pos_inds_cuda = torch.LongTensor(pos_inds).cuda()

            tqdm.write("Epoch: {}, train_loss_gae={:.5f}, time={:.5f}".format(
                epoch + 1, cur_loss,
                time.time() - t))

            db, acc, nmi, adjscore = clustering(Cluster, hidden_emb,
                                                true_labels)

            if db >= best_cl:
                best_cl = db
                best_acc = acc
                best_nmi = nmi
                best_adj = adjscore

    tqdm.write("Optimization Finished!")
    tqdm.write('best_acc: {}, best_nmi: {}, best_adj: {}'.format(
        best_acc, best_nmi, best_adj))
Exemple #5
0
            hidden_emb = None
            t = time.time()
            model.train()
            optimizer.zero_grad()
            features_bs = features_bs.cuda()
            adj_norm = adj_norm.cuda()

            # print('features_bs :')
            # print(features_bs)
            # print('adj_norm :')
            # print(adj_norm)
            recovered, mu, logvar = model(features_bs, adj_norm)
            loss = loss_function(preds=recovered,
                                 labels=adj_label,
                                 mu=mu,
                                 logvar=logvar,
                                 n_nodes=n_nodes,
                                 norm=norm,
                                 pos_weight=pos_weight)

            cpu_loss = loss.cpu()
            cur_loss_list.append(cpu_loss.data.numpy().tolist())
            loss.backward()
            # cur_loss = loss.item()
            optimizer.step()

            hidden_emb = mu.cpu().data.numpy()

        print(cur_loss_list)
        print(len(cur_loss_list))
        # one epoch one mean loss
Exemple #6
0
def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    adj, features, y_test, tx, ty, test_maks, true_labels = load_data(
        args.dataset_str)
    n_nodes, feat_dim = features.shape

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)
    adj = adj_train

    # Before proceeding further, make the structure for doing deepWalk
    if args.dw == 1:
        print('Using deepWalk regularization...')
        G = load_edgelist_from_csr_matrix(adj_orig, undirected=True)
        print("Number of nodes: {}".format(len(G.nodes())))
        num_walks = len(G.nodes()) * args.number_walks
        print("Number of walks: {}".format(num_walks))
        data_size = num_walks * args.walk_length
        print("Data size (walks*length): {}".format(data_size))

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    # adj_label = torch.DoubleTensor(adj_label.toarray())
    adj_label = torch.FloatTensor(adj_label.toarray())

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    if args.model == 'gcn_vae':
        model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout)
    else:
        model = GCNModelAE(feat_dim, args.hidden1, args.hidden2, args.dropout)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    if args.dw == 1:
        sg = SkipGram(args.hidden2, adj.shape[0])
        optimizer_dw = optim.Adam(sg.parameters(), lr=args.lr_dw)

        # Construct the nodes for doing random walk. Doing it before since the seed is fixed
        nodes_in_G = list(G.nodes())
        chunks = len(nodes_in_G) // args.number_walks
        random.Random().shuffle(nodes_in_G)

    hidden_emb = None
    for epoch in tqdm(range(args.epochs)):
        t = time.time()
        model.train()
        optimizer.zero_grad()
        z, mu, logvar = model(features, adj_norm)

        # After back-propagating gae loss, now do the deepWalk regularization
        if args.dw == 1:
            sg.train()
            if args.full_number_walks > 0:
                walks = build_deepwalk_corpus(G,
                                              num_paths=args.full_number_walks,
                                              path_length=args.walk_length,
                                              alpha=0,
                                              rand=random.Random(SEED))
            else:
                walks = build_deepwalk_corpus_iter(
                    G,
                    num_paths=args.number_walks,
                    path_length=args.walk_length,
                    alpha=0,
                    rand=random.Random(SEED),
                    chunk=epoch % chunks,
                    nodes=nodes_in_G)
            for walk in walks:
                if args.context == 1:
                    # Construct the pairs for predicting context node
                    # for each node, treated as center word
                    curr_pair = (int(walk[center_node_pos]), [])
                    for center_node_pos in range(len(walk)):
                        # for each window position
                        for w in range(-args.window_size,
                                       args.window_size + 1):
                            context_node_pos = center_node_pos + w
                            # make soure not jump out sentence
                            if context_node_pos < 0 or context_node_pos >= len(
                                    walk
                            ) or center_node_pos == context_node_pos:
                                continue
                            context_node_idx = walk[context_node_pos]
                            curr_pair[1].append(int(context_node_idx))
                else:
                    # first item in the walk is the starting node
                    curr_pair = (int(walk[0]), [
                        int(context_node_idx) for context_node_idx in walk[1:]
                    ])

                if args.ns == 1:
                    neg_nodes = []
                    pos_nodes = set(walk)
                    while len(neg_nodes) < args.walk_length - 1:
                        rand_node = random.randint(0, n_nodes - 1)
                        if rand_node not in pos_nodes:
                            neg_nodes.append(rand_node)
                    neg_nodes = torch.from_numpy(np.array(neg_nodes)).long()

                # Do actual prediction
                src_node = torch.from_numpy(np.array([curr_pair[0]])).long()
                tgt_nodes = torch.from_numpy(np.array(curr_pair[1])).long()
                optimizer_dw.zero_grad()
                log_pos = sg(src_node, tgt_nodes, neg_sample=False)
                if args.ns == 1:
                    loss_neg = sg(src_node, neg_nodes, neg_sample=True)
                    loss_dw = log_pos + loss_neg
                else:
                    loss_dw = log_pos
                loss_dw.backward(retain_graph=True)
                cur_dw_loss = loss_dw.item()
                optimizer_dw.step()

        loss = loss_function(preds=model.dc(z),
                             labels=adj_label,
                             mu=mu,
                             logvar=logvar,
                             n_nodes=n_nodes,
                             norm=norm,
                             pos_weight=pos_weight)
        loss.backward()
        cur_loss = loss.item()
        optimizer.step()

        hidden_emb = mu.data.numpy()
        roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges,
                                          val_edges_false)

        if args.dw == 1:
            tqdm.write(
                "Epoch: {}, train_loss_gae={:.5f}, train_loss_dw={:.5f}, val_ap={:.5f}, time={:.5f}"
                .format(epoch + 1, cur_loss, cur_dw_loss, ap_curr,
                        time.time() - t))
        else:
            tqdm.write(
                "Epoch: {}, train_loss_gae={:.5f}, val_ap={:.5f}, time={:.5f}".
                format(epoch + 1, cur_loss, ap_curr,
                       time.time() - t))

        if (epoch + 1) % 10 == 0:
            tqdm.write("Evaluating intermediate results...")
            kmeans = KMeans(n_clusters=args.n_clusters,
                            random_state=0).fit(hidden_emb)
            predict_labels = kmeans.predict(hidden_emb)
            cm = clustering_metrics(true_labels, predict_labels)
            cm.evaluationClusterModelFromLabel(tqdm)
            roc_score, ap_score = get_roc_score(hidden_emb, adj_orig,
                                                test_edges, test_edges_false)
            tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score))
            np.save('logs/emb_epoch_{}.npy'.format(epoch + 1), hidden_emb)

    tqdm.write("Optimization Finished!")

    roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges,
                                        test_edges_false)
    tqdm.write('Test ROC score: ' + str(roc_score))
    tqdm.write('Test AP score: ' + str(ap_score))
    kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb)
    predict_labels = kmeans.predict(hidden_emb)
    cm = clustering_metrics(true_labels, predict_labels)
    cm.evaluationClusterModelFromLabel(tqdm)

    if args.plot == 1:
        cm.plotClusters(tqdm, hidden_emb, true_labels)
Exemple #7
0
def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    adj, features, y_test, tx, ty, test_maks, true_labels = load_data(args.dataset_str)
    n_nodes, feat_dim = features.shape

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
    adj = adj_train

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    # adj_label = torch.DoubleTensor(adj_label.toarray())
    adj_label = torch.FloatTensor(adj_label.toarray())

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    if args.model == 'gcn_vae':
        model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout)
    else:
        model = GCNModelAE(feat_dim, args.hidden1, args.hidden2, args.dropout)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    hidden_emb = None
    for epoch in tqdm(range(args.epochs)):
        t = time.time()
        model.train()
        optimizer.zero_grad()
        z, mu, logvar = model(features, adj_norm)

        loss = loss_function(preds=model.dc(z), labels=adj_label,
                             mu=mu, logvar=logvar, n_nodes=n_nodes,
                             norm=norm, pos_weight=pos_weight)
        loss.backward()
        cur_loss = loss.item()
        optimizer.step()

        hidden_emb = mu.data.numpy()
        roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false)
        
        tqdm.write("Epoch: {}, train_loss_gae={:.5f}, val_ap={:.5f}, time={:.5f}".format(
            epoch + 1, cur_loss,
            ap_curr, time.time() - t))

        if (epoch + 1) % 10 == 0:
            tqdm.write("Evaluating intermediate results...")
            kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb)
            predict_labels = kmeans.predict(hidden_emb)
            cm = clustering_metrics(true_labels, predict_labels)
            cm.evaluationClusterModelFromLabel(tqdm)
            roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false)
            tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score))
            np.save('logs/emb_epoch_{}.npy'.format(epoch + 1), hidden_emb)

    tqdm.write("Optimization Finished!")

    roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false)
    tqdm.write('Test ROC score: ' + str(roc_score))
    tqdm.write('Test AP score: ' + str(ap_score))
    kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb)
    predict_labels = kmeans.predict(hidden_emb)
    cm = clustering_metrics(true_labels, predict_labels)
    cm.evaluationClusterModelFromLabel(tqdm)

    if args.plot == 1:
        cm.plotClusters(tqdm, hidden_emb, true_labels)
Exemple #8
0
def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    # adj, features, y_test, tx, ty, test_maks, true_labels = load_data('cora')
    # print(true_labels)
    # adj, features, y_test, test_maks, true_labels=load_npz('amazon_electronics_photo')
    # print(true_labels)

    # adj=preprocess_high_order_adj( adj, 2, 0.01 )
    # print(adj)

    # if args.dataset_split == 'jknet':
    g, features, true_labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data(
        args.dataset_str, None, 0.6, 0.2)
    adj = g.adj(scipy_fmt='coo')
    true_labels = true_labels.detach().numpy()
    # print(true_labels)
    # else:
    #     g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels = utils_data.load_data(
    #         args.dataset_str, args.dataset_split)

    args.n_clusters = true_labels.max() + 1
    print(args.n_clusters, "ssssssss")

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)
    # adj = adj_train

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    # adj_norm =  torch.sparse.FloatTensor(sp.coo_matrix(adj))
    # adj_norm=torch.tensor(adj.todense(),dtype=torch.float)
    # print(adj_norm)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    adj_label = torch.FloatTensor(adj_label.toarray())

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    z_x = torch.zeros(features.shape[0], args.hidden1)
    z_w = torch.zeros(features.shape[0], args.hidden2)
    z_shuffle = torch.cat((features, z_x, z_w), axis=1)
    n_nodes, feat_dim = z_shuffle.shape

    model = GCNModelVAE(feat_dim, args.hidden1, args.hidden2, args.dropout,
                        args.n_clusters)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    z_x, mu_x, _, z_w, mu_w, _, _, logvar_px, qz = model(z_shuffle, adj_norm)
    z_shuffle = torch.cat((features, z_x.detach_(), z_w.detach_()), axis=1)
    hidden_emb = None
    for epoch in tqdm(range(args.epochs)):
        t = time.time()
        model.train()
        optimizer.zero_grad()

        # z_shuffle=torch.cat((features,z_x.detach_(),z_w.detach_()),axis=1)
        z_x, mu_x, logvar_x, z_w, mu_w, logvar_w, mu_px, logvar_px, qz = model(
            z_shuffle, adj_norm)

        # print(z_x.shape,"z_x.shape")
        # After back-propagating gae loss, now do the deepWalk regularization

        # mu_x = mu_x.unsqueeze(-1)
        # mu_x = mu_x.expand(-1, args.hidden2)

        logvar_x1 = logvar_x.unsqueeze(-1)
        logvar_x1 = logvar_x1.expand(-1, args.hidden2, args.n_clusters)

        mu_x1 = mu_x.unsqueeze(-1)
        mu_x1 = mu_x1.expand(-1, args.hidden2, args.n_clusters)
        if torch.cuda.is_available():
            mu_x1 = mu_x1.cuda()
            logvar_x1 = logvar_x1.cuda()

        # KLD_W = -0.5 / n_nodes* torch.sum(1 + logvar_w - mu_w.pow(2) - logvar_w.exp())
        # KLD_Z = -torch.sum(qz * torch.log(qz + 1e-10))/n_nodes
        KLD_Z = -0.5 / n_nodes * torch.mean(
            torch.sum(1 + qz * torch.log(qz + 1e-10), 1))
        # print(KLD_Z,"klz")

        # qz = qz.unsqueeze(-1)
        # qz = qz.expand(-1, 1)

        # print(logvar_px.shape,logvar_x1.shape,"hhhhi")
        # KLD_QX_PX = 0.5 / n_nodes* (((logvar_px - logvar_x) + ((logvar_x.exp() + (mu_x - mu_px).pow(2))/logvar_px.exp())) - 1)
        # # print(KLD_QX_PX.shape,qz.shape,"hhhhi")
        # KLD_QX_PX = KLD_QX_PX.unsqueeze(1)
        # qz = qz.unsqueeze(-1)
        # print(KLD_QX_PX.shape,qz.shape,"hhhhi")

        # KLD_QX_PX = KLD_QX_PX.expand(2708, 1, args.hidden2)
        KLD_QX_PX = loss_function(preds=model.dc(z_x),
                                  labels=adj_label,
                                  mu=(mu_x1 - mu_px),
                                  logvar=(logvar_px - logvar_x1),
                                  n_nodes=n_nodes,
                                  norm=norm,
                                  pos_weight=pos_weight)
        KLD_QX_PX = KLD_QX_PX = KLD_QX_PX.expand(n_nodes, 1, args.hidden2)
        E_KLD_QX_PX = torch.sum(
            torch.bmm(KLD_QX_PX,
                      qz.unsqueeze(-1) / n_nodes))
        # print(E_KLD_QX_PX)
        # print(model.dc(z_x).shape,adj_label.shape,"hdhhhhhhd")

        model.train()
        optimizer.zero_grad()
        lbl_1 = torch.ones(n_nodes)
        lbl_2 = torch.zeros(n_nodes)
        lbl = torch.cat((lbl_1, lbl_2))
        idx = np.random.permutation(n_nodes)
        # print(features.shape,z_x.shape,adj_norm.shape)
        shuf_fts = z_shuffle[idx, :]
        # FeatHL=torch.cat((features,shuf_fts),axis=1)
        # _, featHL_dim = FeatHL.shape
        # modelHL = GCNModelVAE(featHL_dim, args.hidden1, args.hidden2, args.dropout,2)
        n_nodes1, feat_dim1 = z_shuffle.shape
        # model1 = GCNModelVAE(feat_dim1, args.hidden1, args.hidden2, args.dropout,args.n_clusters)
        # z_xL1, mu_xL1, logvar_xL1,z_wL1, mu_wL1, logvar_wL1,mu_pxL1, logvar_pxL1,_ = model1(z_shuffle, adj_norm)

        z_xL2, mu_xL2, logvar_xL2, z_wL2, mu_wL2, logvar_wL2, mu_pxL2, logvar_pxL2, qz2 = model(
            shuf_fts, adj_norm)
        KLD_Z2 = 0.5 / n_nodes * torch.mean(
            torch.sum(1 + qz2 * torch.log(qz2 + 1e-10), 1))

        KLD_QX_PX2 = loss_function(preds=model.dc(z_wL2),
                                   labels=adj_label,
                                   mu=mu_wL2,
                                   logvar=logvar_wL2,
                                   n_nodes=n_nodes,
                                   norm=norm,
                                   pos_weight=pos_weight)
        KLD_QX_PX2 = KLD_QX_PX2.expand(n_nodes, 1, args.hidden2)
        E_KLD_QX_PX2 = torch.sum(
            torch.bmm(KLD_QX_PX2,
                      qz2.unsqueeze(-1) / n_nodes))

        lossF = (1.0/loss_function(preds=model.dc(z_xL2), labels=adj_label,
                             mu=mu_xL2, logvar=logvar_xL2, n_nodes=n_nodes,
                             norm=norm, pos_weight=pos_weight))+\
                             (1.0/E_KLD_QX_PX2)+KLD_Z2
        # z_xL2, mu_xL2, logvar_xL2,z_wL2, mu_wL2, logvar_wL2,mu_pxL2, logvar_pxL2,qz2 = model(shuf_fts, adj_norm)
        # lossF = (1.0/loss_function(preds=model.dc(z_xL2), labels=adj_label,
        #                      mu=mu_xL2, logvar=logvar_xL2, n_nodes=n_nodes,
        #                      norm=norm, pos_weight=pos_weight))+\
        #                      (1.0/E_KLD_QX_PX2)+ KLD_Z2+lossF

        # lossF = loss_functionShuffle(preds=model.dc(z_xL2), labels=adj_label,
        #                      mu=mu_xL2, logvar=logvar_xL2, n_nodes=n_nodes,
        #                      norm=norm, pos_weight=pos_weight)+\
        #                      loss_functionShuffle(preds=model.dc(z_wL2), labels=adj_label,
        #                      mu=mu_wL2, logvar=logvar_wL2, n_nodes=n_nodes,
        #                      norm=norm, pos_weight=pos_weight)
        # LossF=Variable(torch.tensor(lossF).type(torch.FloatTensor),requires_grad=True)
        # lossF.backward()

        loss = loss_function(
            preds=model.dc(z_x),
            labels=adj_label,
            mu=mu_x,
            logvar=logvar_x,
            n_nodes=n_nodes,
            norm=norm,
            pos_weight=pos_weight) + loss_function(
                preds=model.dc(z_w),
                labels=adj_label,
                mu=mu_w,
                logvar=logvar_w,
                n_nodes=n_nodes,
                norm=norm,
                pos_weight=pos_weight) + lossF + KLD_Z + E_KLD_QX_PX
        # if lossF<0.02:
        #   break
        # lossF.backward()
        # HL=np.concatenate((mu_xL1.data.numpy(),mu_wL1.data.numpy()),axis=1)
        # HL2=np.concatenate((mu_xL2.data.numpy(),mu_wL2.data.numpy()),axis=1)
        # kmeans = KMeans(n_clusters=2, random_state=0).fit(HL)
        # kmeans2 = KMeans(n_clusters=2, random_state=0).fit(HL2)
        # predict_labels = kmeans.predict(HL)
        # predict_labels2 = kmeans.predict(HL2)
        # pr=np.amax(kmeans.fit_transform(HL), axis=1)
        # pr2=np.amax(kmeans.fit_transform(HL2), axis=1)
        # pr=torch.cat((torch.tensor(pr), torch.tensor(pr2)))
        # b_xent = nn.BCEWithLogitsLoss()
        # lossF = b_xent(torch.FloatTensor(pr),torch.FloatTensor(lbl))

        # print(lossF)
        # print(loss, lossF)
        loss.backward(retain_graph=True)
        cur_loss = loss.item()
        optimizer.step()

        hidden_emb = np.concatenate((mu_x.data.numpy(), mu_w.data.numpy()),
                                    axis=1)
        # hidden_emb=mu_x.data.numpy()
        # print(hidden_emb.shape)
        # roc_curr, ap_curr = get_roc_score(hidden_emb, adj_orig, val_edges, val_edges_false)

        # if args.dw == 1:
        #     tqdm.write("Epoch: {}, train_loss_gae={:.5f}, train_loss_dw={:.5f}, val_ap={:.5f}, time={:.5f}".format(
        #         epoch + 1, cur_loss, cur_dw_loss,
        #         ap_curr, time.time() - t))
        # else:
        #     tqdm.write("Epoch: {}, train_loss_gae={:.5f}, val_ap={:.5f}, time={:.5f}".format(
        #         epoch + 1, cur_loss,
        #         ap_curr, time.time() - t))
        roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges,
                                            test_edges_false)
        # # tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score))
        wandb.log({"roc_score1": roc_score})
        wandb.log({"ap_score1": ap_score})
        if (epoch + 1) % 10 == 0:
            tqdm.write("Evaluating intermediate results...")
            kmeans = KMeans(n_clusters=args.n_clusters,
                            random_state=0).fit(hidden_emb)
            predict_labels = kmeans.predict(hidden_emb)
            # print(np.argmax(kmeans.fit_transform(hidden_emb), axis=1).shape)
            pr = np.amax(kmeans.fit_transform(hidden_emb), axis=1)
            b_xent = nn.BCEWithLogitsLoss()
            print(loss, lossF)
            # lossF = b_xent(torch.FloatTensor(pr),torch.FloatTensor(true_labels))
            cm = clustering_metrics(true_labels, predict_labels)
            cm.evaluationClusterModelFromLabel(tqdm)
            roc_score, ap_score = get_roc_score(hidden_emb, adj_orig,
                                                test_edges, test_edges_false)
            tqdm.write('ROC: {}, AP: {}'.format(roc_score, ap_score))
            # np.save('logs/emb_epoch_{}.npy'.format(epoch + 1), hidden_emb)
            print(loss, lossF)
            print("Kmeans ACC", purity_score(true_labels, predict_labels))
            # roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges, test_edges_false)
            # tqdm.write('Test ROC score: ' + str(roc_score))
            # tqdm.write('Test AP score: ' + str(ap_score))
    tqdm.write("Optimization Finished!")

    roc_score, ap_score = get_roc_score(hidden_emb, adj_orig, test_edges,
                                        test_edges_false)
    tqdm.write('Test ROC score: ' + str(roc_score))
    tqdm.write('Test AP score: ' + str(ap_score))
    kmeans = KMeans(n_clusters=args.n_clusters, random_state=0).fit(hidden_emb)
    predict_labels = kmeans.predict(hidden_emb)
    cm = clustering_metrics(true_labels, predict_labels)
    cm.evaluationClusterModelFromLabel(tqdm)
    print("Kmeans ACC", purity_score(true_labels, predict_labels))

    if args.plot == 1:
        cm.plotClusters(tqdm, hidden_emb, true_labels)
Exemple #9
0
def gae_for(args):
    print("Using {} dataset".format(args.dataset_str))
    # Set tensor dtype to float16
    # torch.set_default_tensor_type(torch.HalfTensor)

    adj, features = load_data(args.dataset_str)
    _, n_nodes, feat_dim = features.shape

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)
    adj = adj_train

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    # adj_label = sparse_to_tuple(adj_label)
    adj_label = torch.FloatTensor(adj_label.toarray())

    pos_weight = torch.tensor(
        [float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()])
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    model = GCNModelSIGVAE(args.edim,
                           feat_dim,
                           args.hidden1,
                           args.hidden2,
                           args.dropout,
                           encsto=args.encsto,
                           gdc=args.gdc,
                           ndist=args.noise_dist,
                           copyK=args.K,
                           copyJ=args.J,
                           device=args.device)

    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    hidden_emb = None

    model.to(args.device)
    features = features.to(args.device)
    adj_norm = adj_norm.to(args.device)
    adj_label = adj_label.to(args.device)
    pos_weight = pos_weight.to(args.device)

    for epoch in range(args.epochs):
        t = time.time()
        model.train()
        optimizer.zero_grad()
        recovered, mu, logvar, z, z_scaled, eps, rk, snr = model(
            features, adj_norm)
        loss_rec, loss_prior, loss_post = loss_function(preds=recovered,
                                                        labels=adj_label,
                                                        mu=mu,
                                                        logvar=logvar,
                                                        emb=z,
                                                        eps=eps,
                                                        n_nodes=n_nodes,
                                                        norm=norm,
                                                        pos_weight=pos_weight)

        WU = np.min([epoch / 300., 1.])
        reg = (loss_post - loss_prior) * WU / (n_nodes**2)

        loss_train = loss_rec + WU * reg
        # loss_train = loss_rec
        loss_train.backward()

        cur_loss = loss_train.item()
        cur_rec = loss_rec.item()
        # cur_rec_bce = loss_rec1.item()
        optimizer.step()

        hidden_emb = z_scaled.detach().cpu().numpy()
        roc_curr, ap_curr = get_roc_score(hidden_emb, val_edges,
                                          val_edges_false, args.gdc)

        print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
              "{:.5f}".format(cur_loss), "rec_loss=", "{:.5f}".format(cur_rec),
              "val_ap=", "{:.5f}".format(ap_curr), "time=",
              "{:.5f}".format(time.time() - t))
        # print(rk.detach().cpu().numpy())

        cur_snr = snr.detach().cpu().numpy()
        print("SNR: ", cur_snr)

        if ((epoch + 1) % args.monit == 0):
            model.eval()
            recovered, mu, logvar, z, z_scaled, eps, rk, _ = model(
                features, adj_norm)
            hidden_emb = z_scaled.detach().cpu().numpy()
            roc_score, ap_score = get_roc_score(hidden_emb, test_edges,
                                                test_edges_false, args.gdc)
            rslt = "Test ROC score: {:.4f}, Test AP score: {:.4f}\n".format(
                roc_score, ap_score)
            print("\n", rslt, "\n")
            with open("results.txt", "a+") as f:
                f.write(rslt)

    print("Optimization Finished!")