Ejemplo n.º 1
0
    def __init__(self, path, dataset, split):
        """Load dataset
           Preprocess feature, label, normalized adjacency matrix and train/val/test index

        Args:
            path (str): file path
            dataset (str): dataset name
            split (str): type of dataset split
        """
        data = Planetoid(root=path, name=dataset, split=split)
        self.feature = data[0].x
        self.edge = data[0].edge_index
        self.label = data[0].y
        self.idx_train = torch.where(data[0].train_mask)[0]
        self.idx_val = torch.where(data[0].val_mask)[0]
        self.idx_test = torch.where(data[0].test_mask)[0]
        self.n_node = data[0].num_nodes
        self.n_edge = data[0].num_edges
        self.n_class = data.num_classes
        self.n_feature = data.num_features
        self.adj = torch.sparse_coo_tensor(self.edge, torch.ones(self.n_edge),
                                           [self.n_node, self.n_node])
        self.adj = torch.add(self.adj, sparse_diag(torch.ones(self.n_node)))
        self.norm_adj = normalize_adj(self.adj, symmetric=True)
        self.adj_train = sparse_select(
            sparse_select(self.adj, 0, self.idx_train), 1, self.idx_train)
        self.norm_adj_train = normalize_adj(self.adj_train, symmetric=True)
Ejemplo n.º 2
0
Archivo: gcn.py Proyecto: Empythy/GCNN
def gcn(signal_in, weights_hidden, weights_A, biases, hidden_num, node_num, horizon):
    
    signal_in = tf.transpose(signal_in, [1, 0, 2]) # node_num, ?batch, feature_in
    feature_len = signal_in.shape[2] # feature vector length at the node of the input graph

    i = 0
    while i < hidden_num:

        signal_in = tf.reshape(signal_in, [node_num, -1]) # node_num, batch*feature_in
        
        Adj = 0.5*(weights_A['A'+str(i)] + tf.transpose(weights_A['A'+str(i)])) 
        Adj = normalize_adj(Adj)
        Z = tf.matmul(Adj, signal_in) # node_num, batch*feature_in 
        Z = tf.reshape(Z, [-1, int(feature_len)]) # node_num * batch, feature_in
        signal_output = tf.add(tf.matmul(Z, weights_hidden['h'+str(i)]), biases['b'+str(i)])
        signal_output = tf.nn.relu(signal_output) # node_num * batch, hidden_vec
        
        i += 1
        signal_in = signal_output # the sinal for next layer 
        feature_len = signal_in.shape[1] # feature vector length at hidden layers
        #print (feature_len)
    
    final_output = tf.add(tf.matmul(signal_output, weights_hidden['out']), biases['bout'])  # node_num * batch, horizon
    final_output = tf.reshape(final_output, [node_num, -1, horizon]) # node_num, batch, horizon
    final_output = tf.transpose(final_output, [1, 0, 2]) # batch, node_num, horizon
    final_output = tf.reshape(final_output, [-1, node_num*horizon]) # batch, node_num*horizon
 
    return final_output
Ejemplo n.º 3
0
    def __init__(self, path, dataset, split, k):
        """Load dataset
           Preprocess feature, label, normalized adjacency matrix and train/val/test index

        Args:
            path (str): file path
            dataset (str): dataset name
            split (str): type of dataset split
            k (int) k-hop aggregation
        """
        data = Planetoid(root=path, name=dataset, split=split)
        self.feature = data[0].x
        self.edge = data[0].edge_index
        self.label = data[0].y
        self.idx_train = torch.where(data[0].train_mask)[0]
        self.idx_val = torch.where(data[0].val_mask)[0]
        self.idx_test = torch.where(data[0].test_mask)[0]
        self.n_node = data[0].num_nodes
        self.n_edge = data[0].num_edges
        self.n_class = data.num_classes
        self.n_feature = data.num_features
        self.adj = torch.sparse_coo_tensor(self.edge, torch.ones(self.n_edge),
                                           [self.n_node, self.n_node])
        self.norm_adj = normalize_adj(self.adj, symmetric=True)
        self.feature_diffused = [self.feature]
        for i in range(k):
            self.feature_diffused.append(
                torch.sparse.mm(self.norm_adj, self.feature_diffused[i]))
Ejemplo n.º 4
0
def run(args, seed):

    setup_seed(seed)
    adj, features, labels, idx_train, idx_val, idx_test = load_data(args['dataset'])

    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    adj = adj.cuda()
    features = features.cuda()
    labels = labels.cuda()

    loss_func = nn.CrossEntropyLoss()
    loss_func_ss = nn.L1Loss()
    early_stopping = 10

    adj_raw = load_adj_raw(args['dataset']).tocsr()
    idx_mask = list(range(node_num))
    adj_mask = adj_raw
    adj_mask[idx_mask, idx_mask] = 0
    adj_mask = sparse_mx_to_torch_sparse_tensor(normalize_adj(adj_mask)).cuda()

    reduced_dim = args['reduced_dimension']
    ss_labels, _, _ = features.svd()
    ss_labels = ss_labels[:, :reduced_dim].cuda()

    net_gcn = net.net_gcn_multitask(embedding_dim=args['embedding_dim'], ss_dim=args['reduced_dimension'])
    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
    best_val = 0
    best_val_test = 0
    for epoch in range(500):

        optimizer.zero_grad()
        output, _ = net_gcn(features, adj)
        _, output_ss = net_gcn(features, adj_mask)
        loss_target = loss_func(output[idx_train], labels[idx_train])
        loss_ss = loss_func_ss(output_ss, ss_labels) * 1e2
        loss = loss_target + loss_ss * args['loss_weight']
        # print('epoch', epoch, 'loss', loss_target.data)
        loss.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            output, _ = net_gcn(features, adj, val_test=True)
            # loss_val.append(loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            # print('val acc', f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))

            acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro')
            acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro')
            if acc_val > best_val:
                best_val = acc_val
                best_val_test = acc_test

    return best_val, best_val_test
Ejemplo n.º 5
0
    def _get_gat_features(self, input_tensor, adj):
        embedded = input_tensor
        adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
        adj = normalize_adj(adj + sp.eye(adj.shape[0]))
        adj = torch.FloatTensor(np.array(adj.todense()))
        features = normalize_features(embedded.detach().numpy())
        features = torch.FloatTensor(np.array(features))

        logits = self.gat(features, adj)

        return logits
Ejemplo n.º 6
0
def load(dataset):
    basedir = os.path.dirname(os.path.abspath(__file__))
    datadir = os.path.join(basedir, 'data', dataset)

    if not os.path.exists(datadir):
        download(dataset)
        graphs, diff = process(dataset)
        feat, adj, labels = [], [], []

        for idx, graph in enumerate(graphs):
            adj.append(nx.to_numpy_array(graph))
            labels.append(graph.graph['label'])
            feat.append(np.array(list(nx.get_node_attributes(graph, 'feat').values())))

        adj, diff, feat, labels = np.array(adj), np.array(diff), np.array(feat), np.array(labels)

        np.save(f'{datadir}/adj.npy', adj)
        np.save(f'{datadir}/diff.npy', diff)
        np.save(f'{datadir}/feat.npy', feat)
        np.save(f'{datadir}/labels.npy', labels)

    else:
        adj = np.load(f'{datadir}/adj.npy', allow_pickle=True)
        diff = np.load(f'{datadir}/diff.npy', allow_pickle=True)
        feat = np.load(f'{datadir}/feat.npy', allow_pickle=True)
        labels = np.load(f'{datadir}/labels.npy', allow_pickle=True)

    max_nodes = max([a.shape[0] for a in adj])
    feat_dim = feat[0].shape[-1]

    num_nodes = []

    for idx in range(adj.shape[0]):

        num_nodes.append(adj[idx].shape[-1])

        adj[idx] = normalize_adj(adj[idx]).todense()

        diff[idx] = np.hstack(
            (np.vstack((diff[idx], np.zeros((max_nodes - diff[idx].shape[0], diff[idx].shape[0])))),
             np.zeros((max_nodes, max_nodes - diff[idx].shape[1]))))

        adj[idx] = np.hstack(
            (np.vstack((adj[idx], np.zeros((max_nodes - adj[idx].shape[0], adj[idx].shape[0])))),
             np.zeros((max_nodes, max_nodes - adj[idx].shape[1]))))

        feat[idx] = np.vstack((feat[idx], np.zeros((max_nodes - feat[idx].shape[0], feat_dim))))

    adj = np.array(adj.tolist()).reshape(-1, max_nodes, max_nodes)
    diff = np.array(diff.tolist()).reshape(-1, max_nodes, max_nodes)
    feat = np.array(feat.tolist()).reshape(-1, max_nodes, feat_dim)

    return adj, diff, feat, labels, num_nodes
Ejemplo n.º 7
0
def analyze_VGAE(args, placeholders, data, model, model_name, sess):

    adj = data
    adj_norm = normalize_adj(adj)

    # change num_features to features shape for training with features
    num_nodes = data.shape[1]
    num_features = data.shape[1]
    features_batch = np.zeros([args.batch_size, num_nodes, num_features],
                              dtype=np.float32)

    for i in features_batch:
        np.fill_diagonal(i, 1.)

    adj_norm_batch, adj_orig_batch, adj_idx = get_consecutive_batch_VGAE(
        0, args.batch_size, adj, adj_norm)
    features = features_batch
    feed_dict = construct_feed_dict_VGAE(adj_norm_batch, adj_orig_batch,
                                         features, 0.0, placeholders)
    outs = sess.run([model.reconstructions, model.z_mean], feed_dict=feed_dict)

    reconstructions = outs[0].reshape([args.batch_size, 180, 180])
    z_mean = outs[1]
    rc = tf.reduce_mean(tf.square(adj_orig_batch - reconstructions))

    # Visualize first ten full matrices of original,
    # normalized, and reconstructed batches.
    for i in range(10):
        visualize_matrix(adj_orig_batch, i, model_name, 'original_' + str(i))
        visualize_matrix(adj_norm_batch, i, model_name, 'normalized_' + str(i))
        visualize_matrix(reconstructions, i, model_name,
                         'reconstruction_' + str(i))

    idx_all, z_all = [], []
    for i in range(10):
        adj_norm_batch, adj_orig_batch, adj_idx = get_random_batch_VGAE( \
                                                args.batch_size, adj, adj_norm)
        features = features_batch

        # Meaningless placeholder for dropout (0.0)
        feed_dict = construct_feed_dict_VGAE(adj_norm_batch, adj_orig_batch,
                                             features, 0.0, placeholders)
        outs = sess.run([model.reconstructions, model.z_mean],
                        feed_dict=feed_dict)
        idx_all.append(adj_idx)
        z_all.append(outs[1])

    # Visualize Latent Space
    z = np.array(z_all).reshape(-1, 10)
    idx = np.array(idx_all).flatten()
    onehot = np.array([0 if i < 203 else 1 for i in idx_all[0]])
    visualize_latent_space_VGAE(z_all[0], onehot, model_name)
Ejemplo n.º 8
0
def load(dataset):
    datadir = os.path.join('data', dataset)

    if not os.path.exists(datadir):
        os.makedirs(datadir)
        ds = download(dataset)
        adj = nx.to_numpy_array(ds.graph)
        diff = compute_ppr(ds.graph, 0.2)
        feat = ds.features[:]
        labels = ds.labels[:]

        idx_train = np.argwhere(ds.train_mask == 1).reshape(-1)
        idx_val = np.argwhere(ds.val_mask == 1).reshape(-1)
        idx_test = np.argwhere(ds.test_mask == 1).reshape(-1)

        np.save(f'{datadir}/adj.npy', adj)
        np.save(f'{datadir}/diff.npy', diff)
        np.save(f'{datadir}/feat.npy', feat)
        np.save(f'{datadir}/labels.npy', labels)
        np.save(f'{datadir}/idx_train.npy', idx_train)
        np.save(f'{datadir}/idx_val.npy', idx_val)
        np.save(f'{datadir}/idx_test.npy', idx_test)
    else:
        adj = np.load(f'{datadir}/adj.npy')
        diff = np.load(f'{datadir}/diff.npy')
        feat = np.load(f'{datadir}/feat.npy')
        labels = np.load(f'{datadir}/labels.npy')
        idx_train = np.load(f'{datadir}/idx_train.npy')
        idx_val = np.load(f'{datadir}/idx_val.npy')
        idx_test = np.load(f'{datadir}/idx_test.npy')

    if dataset == 'citeseer':
        feat = preprocess_features(feat)

        epsilons = [1e-5, 1e-4, 1e-3, 1e-2]
        avg_degree = np.sum(adj) / adj.shape[0]
        epsilon = epsilons[np.argmin([
            abs(avg_degree - np.argwhere(diff >= e).shape[0] / diff.shape[0])
            for e in epsilons
        ])]

        diff[diff < epsilon] = 0.0
        scaler = MinMaxScaler()
        scaler.fit(diff)
        diff = scaler.transform(diff)

    ori_adj = copy.deepcopy(adj)
    # print(ori_adj)
    adj = normalize_adj(adj + sp.eye(adj.shape[0])).todense()

    return ori_adj, adj, diff, feat, labels, idx_train, idx_val, idx_test
Ejemplo n.º 9
0
def load_planetoid_data(dataset_str):
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for name in names:
        with open("data/planetoid/ind.{}.{}".format(dataset_str, name),
                  'rb') as f:
            if sys.version_info > (3, 0):
                out = pkl.load(f, encoding='latin1')
            else:
                out = objects.append(pkl.load(f))

            if name == 'graph':
                objects.append(out)
            else:
                out = out.todense() if hasattr(out, 'todense') else out
                objects.append(torch.Tensor(out))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx = parse_index_file(
        "data/planetoid/ind.{}.test.index".format(dataset_str))
    train_idx = torch.arange(y.size(0), dtype=torch.long)
    val_idx = torch.arange(y.size(0), y.size(0) + 500, dtype=torch.long)
    sorted_test_idx = np.sort(test_idx)

    if dataset_str == 'citeseer':
        len_test_idx = max(test_idx) - min(test_idx) + 1
        tx_ext = torch.zeros(len_test_idx, tx.size(1))
        tx_ext[sorted_test_idx - min(test_idx), :] = tx
        ty_ext = torch.zeros(len_test_idx, ty.size(1))
        ty_ext[sorted_test_idx - min(test_idx), :] = ty

        tx, ty = tx_ext, ty_ext

    features = torch.cat([allx, tx], dim=0)
    features[test_idx] = features[sorted_test_idx]

    labels = torch.cat([ally, ty], dim=0).max(dim=1)[1]
    labels[test_idx] = labels[sorted_test_idx]

    edge_list = adj_list_from_dict(graph)
    edge_list = add_self_loops(edge_list, features.size(0))
    adj = normalize_adj(edge_list)

    train_mask = index_to_mask(train_idx, labels.shape[0])
    val_mask = index_to_mask(val_idx, labels.shape[0])
    test_mask = index_to_mask(test_idx, labels.shape[0])

    data = Data(adj, edge_list, features, labels, train_mask, val_mask,
                test_mask)

    return data
Ejemplo n.º 10
0
def load_npz_data(dataset_str, ntrain, seed):
    with np.load('data/npz/' + dataset_str + '.npz',
                 allow_pickle=True) as loader:
        loader = dict(loader)
    adj_mat = sp.csr_matrix(
        (loader['adj_data'], loader['adj_indices'], loader['adj_indptr']),
        shape=loader['adj_shape']).tocoo()
    if dataset_str[:2] == 'ms':
        edge_list = torch.cat(
            (torch.tensor(adj_mat.row).type(torch.int64).view(1, -1),
             torch.tensor(adj_mat.col).type(torch.int64).view(1, -1)),
            dim=0)
    else:
        edge_list1 = torch.cat(
            (torch.tensor(adj_mat.row).type(torch.int64).view(1, -1),
             torch.tensor(adj_mat.col).type(torch.int64).view(1, -1)),
            dim=0)
        edge_list2 = torch.cat(
            (torch.tensor(adj_mat.col).type(torch.int64).view(1, -1),
             torch.tensor(adj_mat.row).type(torch.int64).view(1, -1)),
            dim=0)
        edge_list = torch.cat([edge_list1, edge_list2], dim=1)

    edge_list = add_self_loops(edge_list, loader['adj_shape'][0])
    adj = normalize_adj(edge_list)
    if 'attr_data' in loader:
        feature_mat = sp.csr_matrix(
            (loader['attr_data'], loader['attr_indices'],
             loader['attr_indptr']),
            shape=loader['attr_shape']).todense()
    elif 'attr_matrix' in loader:
        feature_mat = loader['attr_matrix']
    else:
        feature_mat = None
    features = torch.tensor(feature_mat)

    if 'labels_data' in loader:
        labels = sp.csr_matrix(
            (loader['labels_data'], loader['labels_indices'],
             loader['labels_indptr']),
            shape=loader['labels_shape']).todense()
    elif 'labels' in loader:
        labels = loader['labels']
    else:
        labels = None
    labels = torch.tensor(labels).long()
    train_mask, val_mask, test_mask = split_data(labels, ntrain, 500, seed)

    data = Data(adj, edge_list, features, labels, train_mask, val_mask,
                test_mask)
    return data
Ejemplo n.º 11
0
def train_VGAE(model_path, data, sess, saver,
                    placeholders, model, opt, args):

    # Normalize adjacency matrix (i.e. D^(.5)AD^(.5))
    adj = data
    adj_norm = normalize_adj(adj)

    # CHANGE TO features.shape[1] LATER
    num_nodes = adj.shape[1]
    num_features = adj.shape[1]

    # Use identity matrix for feature-less training
    features_batch = np.zeros([args.batch_size, num_nodes, num_features])
    for i in features_batch:
        np.fill_diagonal(i, 1)

    for epoch in range(args.epochs):
        t = time.time()
        random_batch = get_random_batch_VGAE(args.batch_size, adj, adj_norm)
        adj_norm_batch, adj_orig_batch, adj_idx = random_batch
        feed_dict = construct_feed_dict_VGAE(adj_norm_batch, adj_orig_batch,
                                    features_batch, args.dropout, placeholders)

        if epoch == 0:
            lambd = args.lambd
            feed_dict.update({placeholders['lambd']: lambd})
            [initial] = sess.run([opt.constraint], feed_dict=feed_dict)
            constraint_ma = initial
        else:
            feed_dict.update({placeholders['lambd']: lambd})
            outs = sess.run([opt.opt_op, opt.cost, opt.rc_loss, opt.kl,
                                opt.constraint], feed_dict=feed_dict)
            constraint = outs[4]
            constraint_ma = args.alpha * constraint_ma + (1 - args.alpha) * constraint
            lambd = np.clip(lambd, 0, 1e15)
            lambd *= np.clip(np.exp(constraint_ma), 0.9, 1.1)

            if epoch % 100 == 0:
                _, cost, rc_loss, kl_loss, constraint = outs
                print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
                        "{:.5f}".format(cost), "kl_loss=%s" % (kl_loss),
                        "rc_loss=%s" % (rc_loss), "constraint=%s" % (constraint),
                        "lambd=%s" %(str(lambd)), "constraint_ma=%s" % (constraint_ma),
                        "time=", "{:.5f}".format(time.time() - t))

            # Save model every 500 epochs
            if epoch % 500 == 0 and epoch != 0:
                save_path = saver.save(sess, model_path)
                print('saving checkpoint at',save_path)
Ejemplo n.º 12
0
    def node_sampling(self, idx, n_sample):
        """Sampling neighbors per node"""
        edge = []
        for i in idx:
            sample = self.neighbor_list[i]
            n = len(sample)
            if 0 < n_sample < n:
                sample = sample[torch.randperm(n)[:n_sample]]
            edge.append(
                torch.stack([torch.LongTensor([i] * len(sample)), sample]))
        edge = torch.cat(edge, dim=1)
        adj = torch.sparse_coo_tensor(edge, torch.ones(edge.shape[1]),
                                      self.adj.size())
        if self.agg_type == 'gcn':
            adj = torch.add(adj, sparse_diag(torch.ones(self.n_node)))
        norm_adj = normalize_adj(adj, symmetric=False)
        idx = torch.unique(edge)

        return norm_adj, idx
Ejemplo n.º 13
0
def load_wiki_data(ntrain, seed):
    # generate feature matrix
    sp_feat = torch.tensor(np.loadtxt('data/wiki/tfidf.txt')).t()
    indices = sp_feat[:2].long()
    values = sp_feat[2].float()
    features = torch.sparse.FloatTensor(indices, values).to_dense()

    # generate edge list and adj matrix
    edge_list = torch.tensor(np.loadtxt('data/wiki/graph.txt')).long().t()
    edge_list_rev = torch.stack([edge_list[1], edge_list[0]])
    edge_list = torch.cat([edge_list, edge_list_rev], dim=1)
    edge_list = add_self_loops(edge_list, int(edge_list.max() + 1))
    adj = normalize_adj(edge_list)

    # generate labels and masks
    labels = torch.tensor(np.loadtxt('data/wiki/group.txt')).long().t()[1] - 1
    train_mask, val_mask, test_mask = split_data(labels, ntrain, 500, seed)

    data = Data(adj, edge_list, features, labels, train_mask, val_mask,
                test_mask)
    return data
Ejemplo n.º 14
0
def load_geom_data(dataset_str, ntrain, seed):
    # Feature and Label preprocessing
    with open('data/geom_data/{}/out1_node_feature_label.txt'.format(
            dataset_str)) as f:
        feature_labels = f.readlines()
    feat_list = []
    label_list = []
    for fl in feature_labels[1:]:
        id, feat, lab = fl.split('\t')
        feat = list(map(int, feat.split(',')))
        feat_list.append(feat)
        label_list.append(int(lab))
    features = torch.FloatTensor(feat_list)
    labels = torch.tensor(label_list).long()

    # Graph preprocessing
    with open(
            'data/geom_data/{}/out1_graph_edges.txt'.format(dataset_str)) as f:
        edges = f.readlines()
    edge_pairs = []
    G = nx.Graph()
    for e in edges[1:]:
        u, v = map(int, e.split('\t'))
        edge_pairs.append((u, v))
    G.add_edges_from(edge_pairs)
    coo_adj = nx.to_scipy_sparse_matrix(G).tocoo()
    edge_list = torch.from_numpy(
        np.vstack((coo_adj.row, coo_adj.col)).astype(np.int64))
    edge_list = add_self_loops(edge_list, features.size(0))
    adj = normalize_adj(edge_list)

    train_mask, val_mask, test_mask = split_data(labels, ntrain, ntrain * 5,
                                                 seed)

    data = Data(adj, edge_list, features, labels, train_mask, val_mask,
                test_mask)
    return data
Ejemplo n.º 15
0
    categories=args.categories,
    train=False,
    split=.7)

valid_set = shapenet.ShapeNet_Combination(
    [points_set_valid, images_set_valid, meshes_set_valid])
dataloader_val = DataLoader(valid_set,
                            batch_size=args.batchsize,
                            shuffle=False,
                            collate_fn=collate_fn,
                            num_workers=0)

# Model
mesh = kal.rep.TriangleMesh.from_obj('386.obj', enable_adjacency=True)
mesh.cuda()
normalize_adj(mesh)

initial_verts = mesh.vertices.clone()
camera_fov_y = 49.13434207744484 * np.pi / 180.0
cam_proj = perspectiveprojectionnp(camera_fov_y, 1.0)
cam_proj = torch.FloatTensor(cam_proj).cuda()
model = Encoder(4, 5, args.batchsize, 137, mesh.vertices.shape[0]).cuda()
renderer = Dib_Renderer(137, 137, mode='VertexColor')

model.load_state_dict(torch.load('log/{0}/best.pth'.format(args.expid)))

loss_epoch = 0.
f_epoch = 0.
num_batches = 0
num_items = 0
loss_fn = kal.metrics.point.chamfer_distance
Ejemplo n.º 16
0
    def train_pipeline(self, adj, features, labels, idx_train, idx_val,
                       idx_test, *args):

        adj = normalize_adj(adj + sp.eye(adj.shape[0]))

        if sp.issparse(adj):
            adj = adj.todense()

        if sp.issparse(features):
            features = features.todense()

        # With networkx, we no longer need to convert from one-hot encoding...
        # labels = np.where(labels)[1]

        adj = torch.FloatTensor(adj)
        features = torch.FloatTensor(features)
        labels = torch.LongTensor(labels)
        idx_train = torch.LongTensor(idx_train)
        idx_val = torch.LongTensor(idx_val)
        idx_test = torch.LongTensor(idx_test)

        random.seed(self.args.seed)
        np.random.seed(self.args.seed)
        torch.manual_seed(self.args.seed)
        if self.args.cuda:
            torch.cuda.manual_seed(self.args.seed)

        # Model and optimizer
        if self.args.sparse:
            model = SpGAT(
                nfeat=features.shape[1],
                nhid=self.args.hidden,
                nclass=int(labels.max()) + 1,
                dropout=self.args.dropout,
                nheads=self.args.nb_heads,
                alpha=self.args.alpha,
            )
        else:
            model = GAT(
                nfeat=features.shape[1],
                nhid=self.args.hidden,
                nclass=int(labels.max()) + 1,
                dropout=self.args.dropout,
                nheads=self.args.nb_heads,
                alpha=self.args.alpha,
            )
        optimizer = optim.Adam(model.parameters(),
                               lr=self.args.lr,
                               weight_decay=self.args.weight_decay)

        if self.args.cuda:
            model.cuda()
            features = features.cuda()
            adj = adj.cuda()
            labels = labels.cuda()
            idx_train = idx_train.cuda()
            idx_val = idx_val.cuda()
            idx_test = idx_test.cuda()

        features, adj, labels = Variable(features), Variable(adj), Variable(
            labels)

        # TODO: Test if these lines could be written below line 41.
        self.adj = adj
        self.features = features
        self.labels = labels
        self.idx_train = idx_train
        self.idx_val = idx_val
        self.idx_test = idx_test

        def train(epoch):
            t = time.time()
            model.train()
            optimizer.zero_grad()
            output = model(features, adj)
            loss_train = F.nll_loss(output[idx_train], labels[idx_train])
            acc_train = accuracy(output[idx_train], labels[idx_train])
            loss_train.backward()
            optimizer.step()

            if not self.args.fastmode:
                # Evaluate validation set performance separately,
                # deactivates dropout during validation run.
                model.eval()
                output = model(features, adj)

            loss_val = F.nll_loss(output[idx_val], labels[idx_val])
            acc_val = accuracy(output[idx_val], labels[idx_val])
            print(
                "Epoch: {:04d}".format(epoch + 1),
                "loss_train: {:.4f}".format(loss_train.data.item()),
                "acc_train: {:.4f}".format(acc_train.data.item()),
                "loss_val: {:.4f}".format(loss_val.data.item()),
                "acc_val: {:.4f}".format(acc_val.data.item()),
                "time: {:.4f}s".format(time.time() - t),
            )

            return loss_val.data.item()

        # Train model
        t_total = time.time()
        loss_values = []
        bad_counter = 0
        best = self.args.epochs + 1
        best_epoch = 0
        for epoch in range(self.args.epochs):
            loss_values.append(train(epoch))

            torch.save(model.state_dict(), "{}.pkl".format(epoch))
            if loss_values[-1] < best:
                best = loss_values[-1]
                best_epoch = epoch
                bad_counter = 0
            else:
                bad_counter += 1

            if bad_counter == self.args.patience:
                break

            files = glob.glob("*.pkl")
            for file in files:
                epoch_nb = int(file.split(".")[0])
                if epoch_nb < best_epoch:
                    os.remove(file)

        files = glob.glob("*.pkl")
        for file in files:
            epoch_nb = int(file.split(".")[0])
            if epoch_nb > best_epoch:
                os.remove(file)

        print("Optimization Finished!")
        print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

        # Restore best model
        print("Loading {}th epoch".format(best_epoch))
        model.load_state_dict(torch.load("{}.pkl".format(best_epoch)))

        self.model = model

        return model
Ejemplo n.º 17
0
#adj, features, labels, idx_train, idx_val, idx_test = load_data()
data_dir = "../data"
features = torch.FloatTensor(np.load("{}/feats.npy".format(data_dir)))
num_nodes = features.size()[0]
G = nx.Graph()
with open("{}/all.edgelist".format(data_dir)) as ff:
    for i, line in enumerate(ff):
        info = line.split()
        G.add_edge(int(info[0]), int(info[1]))
# add isolated nodes
for i in range(num_nodes):
    G.add_node(i)
adj = nx.to_scipy_sparse_matrix(G)
# build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
adj = normalize_adj(adj + sp.eye(adj.shape[0]))
adj = torch.FloatTensor(np.array(adj.todense()))

train_label = []
test_label = []

train_row = []
train_col1 = []
train_col2 = []
train_data = []

test_row = []
test_col1 = []
test_col2 = []
test_data = []
Ejemplo n.º 18
0
def load(dataset):
    basedir = os.path.dirname(os.path.abspath(__file__))
    datadir = os.path.join(basedir, 'data', dataset)

    if not os.path.exists(datadir):
        # download(dataset)
        graphs, diff = process(dataset)
        feat, adj, labels = [], [], []

        for idx, graph in enumerate(graphs):
            adj.append(nx.to_numpy_array(graph))
            labels.append(graph.graph['label'])
            feat.append(np.array(list(nx.get_node_attributes(graph, 'feat').values())))

        adj, diff, feat, labels = np.array(adj), np.array(diff), np.array(feat), np.array(labels)

        np.save(f'{datadir}/adj.npy', adj)
        np.save(f'{datadir}/diff.npy', diff)
        np.save(f'{datadir}/feat.npy', feat)
        np.save(f'{datadir}/labels.npy', labels)

    else:
        adj = np.load(f'{datadir}/adj.npy', allow_pickle=True)
        diff = np.load(f'{datadir}/diff.npy', allow_pickle=True)
        feat = np.load(f'{datadir}/feat.npy', allow_pickle=True)
        labels = np.load(f'{datadir}/labels.npy', allow_pickle=True)

    # print(adj[0])
    # print(diff[0])
    # print(adj[1])
    # print(diff[1])
    graphs, diff2 = process(dataset)
    n1 = 0
    n2 = 1
    g1 = nx.from_numpy_matrix(diff2[n1])
    g2 = nx.from_numpy_matrix(diff2[n2])
    # plt.subplot(221)
    nx.draw(graphs[n2], node_size = 100)
    # pos = nx.spring_layout(graphs[n1])
    # plt.subplot(222)
    # # nx.draw(g1, node_size=100, edge_vmin=np.min(diff[0]), edge_vmax=np.max(diff[0]), edge_cmap = 'Accent')
    # draw_graph_with_diff_edge(g1, pos=pos)
    # plt.subplot(223)
    # nx.draw(graphs[n2], node_size = 100)
    # pos = nx.spring_layout(graphs[n2])
    # plt.subplot(224)
    # draw_graph_with_diff_edge(g2, pos=pos)
    plt.show()

    max_nodes = max([a.shape[0] for a in adj])
    feat_dim = feat[0].shape[-1]

    num_nodes = []

    for idx in range(adj.shape[0]):

        num_nodes.append(adj[idx].shape[-1])

        adj[idx] = normalize_adj(adj[idx]).todense()

        diff[idx] = np.hstack(
            (np.vstack((diff[idx], np.zeros((max_nodes - diff[idx].shape[0], diff[idx].shape[0])))),
             np.zeros((max_nodes, max_nodes - diff[idx].shape[1]))))

        adj[idx] = np.hstack(
            (np.vstack((adj[idx], np.zeros((max_nodes - adj[idx].shape[0], adj[idx].shape[0])))),
             np.zeros((max_nodes, max_nodes - adj[idx].shape[1]))))

        feat[idx] = np.vstack((feat[idx], np.zeros((max_nodes - feat[idx].shape[0], feat_dim))))

    adj = np.array(adj.tolist()).reshape(-1, max_nodes, max_nodes)
    diff = np.array(diff.tolist()).reshape(-1, max_nodes, max_nodes)
    feat = np.array(feat.tolist()).reshape(-1, max_nodes, feat_dim)

    return adj, diff, feat, labels, num_nodes
Ejemplo n.º 19
0
    elif args.package != "ctf":
        print("Params: lr={:.4f}, epochs={}, weight_decay={:.5f}, patience={}, hidden_size={}, num_layers={}, package={}, dataset={}"\
    .format(args.lr, args.epochs, args.weight_decay, args.patience, args.hidden_size, args.num_layers, args.package, args.dataset))

    if args.dataset == "cora":
        data = load_data("data/cora/cora.pkl")

    adj = data['adj']
    features = data['features']
    y_train = data['y_train']
    y_val = data['y_val']
    y_test = data['y_test']
    train_mask = data['train_index']
    val_mask = data['val_index']
    test_mask = data['test_index']
    adj = normalize_adj(adj)
    features = normalize_features(features)

    if args.package == "numpy":
        features = features.toarray()
        adj = adj.toarray()
    elif args.package == "ctf":
        y_train = ctf.astensor(y_train)
        y_val = ctf.astensor(y_val)
        y_test = ctf.astensor(y_test)

        features = features.toarray()
        adj = adj.toarray()
        adj = ctf.astensor(adj)
        features = ctf.astensor(features)
Ejemplo n.º 20
0
### build knowledge ###

print("@Build knowledge")
MAX_SEQ_LENGTH, item_dict, reversed_item_dict, item_probs = utils.build_knowledge(
    train_instances, validate_instances, test_instances)

print("#Statistic")
NB_ITEMS = len(item_dict)
print(" + Maximum sequence length: ", MAX_SEQ_LENGTH)
print(" + Total items: ", NB_ITEMS)

print("@Build the real adjacency matrix")
real_adj_matrix = utils.build_sparse_adjacency_matrix_v2(
    train_instances, validate_instances, item_dict)
real_adj_matrix = utils.normalize_adj(real_adj_matrix)

##### calculate correlatoin matrix ######
rmatrix_fpath = output_dir + "/r_matrix_" + str(nb_hop) + "w.npz"
mul = real_adj_matrix
w_mul = real_adj_matrix
coeff = 1.0
for w in range(1, nb_hop):
    coeff *= 0.85
    w_mul *= real_adj_matrix
    w_mul = utils.remove_diag(w_mul)

    w_adj_matrix = utils.normalize_adj(w_mul)
    mul += coeff * w_adj_matrix

real_adj_matrix = mul
Ejemplo n.º 21
0
def load_data_and_gen_samples(train_dataset="HS300_170601-191129",
                              test_dataset="HS300_191202-200529",
                              seq_len_features=60,
                              predict_window=7,
                              gen_sample_interval=1,
                              model='normal',
                              weighted_graph=None,
                              weighted_graph_file=None,
                              hs300_dedicate=False):
    print('Loading {} {} dataset...'.format(train_dataset, test_dataset))
    z_scored_data_dict, features, idx, train_hs300, \
    test_z_scored_data_dict, test_features, test_idx, test_hs300 = init_info(train_dataset, test_dataset)
    for i in range(len(idx)):
        assert idx[i] == test_idx[i]

    if model == 'DA_RNN':
        feature_list, label_list = gen_samples_DA_RNN(
            features, seq_len_features, predict_window, gen_sample_interval,
            hs300_dedicate, train_hs300)
        test_feature_list, test_label_list = gen_samples_DA_RNN(
            test_features, seq_len_features, predict_window,
            gen_sample_interval, hs300_dedicate, test_hs300)
        save_as_pickle(
            [feature_list, label_list, test_feature_list, test_label_list], [
                'features_list', 'labels_list', 'test_features_list',
                'test_labels_list'
            ], 'data/pickle_seq{}_pwin{}_intv_{}_DARNN_ForIJCAI_{}/'.format(
                seq_len_features, predict_window, gen_sample_interval,
                train_dataset.split('_')[0]))
    elif model == 'SFM':
        if not hs300_dedicate:
            generate_samples_SFM(features,
                                 train_dataset.split('_')[0], "train_")
            generate_samples_SFM(test_features,
                                 train_dataset.split('_')[0], "test_")
        else:
            generate_samples_SFM_HS300(train_hs300, "train_")
            generate_samples_SFM_HS300(test_hs300, "test_")

    elif model == 'SFM_multiVar':
        if not hs300_dedicate:
            generate_samples_SFM(features,
                                 train_dataset.split('_')[0],
                                 "train_",
                                 multiVar=True)
            generate_samples_SFM(test_features,
                                 train_dataset.split('_')[0],
                                 "test_",
                                 multiVar=True)
        else:
            generate_samples_SFM_HS300(train_hs300, "train_", multiVar=True)
            generate_samples_SFM_HS300(test_hs300, "test_", multiVar=True)
    elif model == 'GCN' or model == 'TPA_LSTM':
        assert weighted_graph is not None
        assert weighted_graph_file is not None
        idx_map = {j: i for i, j in enumerate(idx)}

        # 一些股票停盘的之类的,舆情有 但是没数据 删除这些股票
        ignore_edge_idx = []
        if 'pkl' not in weighted_graph_file:
            with open(weighted_graph_file, 'r') as f:
                reader = csv.reader(f)
                edges_unordered = []
                for edge_idx, edge_info in enumerate(reader):
                    edge_info = edge_info[0].split(' ')
                    if edge_info[0] not in idx_map.keys(
                    ) or edge_info[1] not in idx_map.keys():
                        ignore_edge_idx.append(edge_idx)
                        continue
                    edges_unordered.append([edge_info[0], edge_info[1]])
            edges_unordered = np.array(edges_unordered, dtype=np.dtype(str))

            edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
                             dtype=np.int32).reshape(edges_unordered.shape)
            adj = sp.coo_matrix(
                (np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                shape=(features.shape[0], features.shape[0]),
                dtype=np.float32)
            if weighted_graph:
                with open(weighted_graph_file, 'r') as f:
                    weight_edges = csv.reader(f)
                    adj_dok = adj.todok()
                    for edge_idx, weight_edge_info in enumerate(weight_edges):
                        if edge_idx in ignore_edge_idx:
                            continue
                        i, j, w = weight_edge_info[0].split(' ')
                        adj_dok[idx_map[i], idx_map[j]] = int(w)
                adj = adj_dok.tocoo()
                from utils import normalize_adj
                adj = normalize_adj(adj + sp.eye(adj.shape[0]))
        else:
            with open(weighted_graph_file, 'rb') as f:
                adj = pickle.load(f)
                adj = np.array(adj[2])
                adj = sp.coo_matrix(adj,
                                    shape=(adj.shape[0], adj.shape[1]),
                                    dtype=np.float32)
                from utils import normalize_adj
                adj = normalize_adj(adj + sp.eye(adj.shape[0]))

        features_list, labels_list = generate_samples_GCN(
            features, seq_len_features, predict_window, gen_sample_interval,
            model, hs300_dedicate, train_hs300)
        test_features_list, test_labels_list = generate_samples_GCN(
            test_features, seq_len_features, predict_window,
            gen_sample_interval, model, hs300_dedicate, test_hs300)
        from pygcn.pygcn.utils import sparse_mx_to_torch_sparse_tensor
        adj = sparse_mx_to_torch_sparse_tensor(adj)
        save_as_pickle([
            adj, features_list, labels_list, test_features_list,
            test_labels_list
        ], [
            'adj', 'features_list', 'labels_list', 'test_features_list',
            'test_labels_list'
        ], 'data/pickle_seq{}_pwin{}_intv{}_GCN_forIJCAI_{}/'.format(
            seq_len_features, predict_window, gen_sample_interval,
            train_dataset.split('_')[0]))

    elif model == 'InceptionTime':
        features_list, labels_list = generate_samples_GCN(features, seq_len_features, predict_window,
                                                         gen_sample_interval, model, hs300_dedicate, train_hs300) \
            if not hs300_dedicate else generate_samples_InceptionTime_hs300(
            features, seq_len_features, predict_window,
            gen_sample_interval, model, hs300_dedicate, train_hs300)
        test_features_list, test_labels_list = generate_samples_GCN(test_features, seq_len_features, predict_window,
                                                                    gen_sample_interval, model,hs300_dedicate, test_hs300) \
            if not hs300_dedicate else generate_samples_InceptionTime_hs300(
            features, seq_len_features, predict_window,
            gen_sample_interval, model, hs300_dedicate, train_hs300)
        features_list = np.array(features_list)
        labels_list = np.array(labels_list)
        test_features_list = np.array(test_features_list)
        test_labels_list = np.array(test_labels_list)
        print(features_list.shape, labels_list.shape, test_features_list.shape,
              test_labels_list.shape)
        save_path = 'data/InceptionTime_seq{}_pwin{}_intv{}_IJCAI_{}/'.format(
            seq_len_features, predict_window, gen_sample_interval,
            train_dataset.split('_')[0])
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        np.save(save_path + "X_TRAIN.npy", features_list)
        np.save(save_path + "Y_TRAIN.npy", labels_list)
        np.save(save_path + "X_TEST.npy", test_features_list)
        np.save(save_path + "Y_TEST.npy", test_labels_list)
Ejemplo n.º 22
0
def run(args, seed):

    setup_seed(seed)
    adj, features, labels, idx_train, idx_val, idx_test = load_data(args['dataset'])
    adj = load_adj_raw(args['dataset'])

    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    g = dgl.DGLGraph().to('cuda:%s' % args['cuda'] )
    print(g.device)
    g.add_nodes(node_num)
    adj = adj.tocoo()
    g.add_edges(adj.row, adj.col)

    # adj = adj.cuda()
    features = features.cuda()
    labels = labels.cuda()

    loss_func = nn.CrossEntropyLoss()
    loss_func_ss = nn.L1Loss()
    early_stopping = 10

    if args['net'] == 'gin':
        net_gcn = GINNet_ss(args['embedding_dim'], args['reduced_dimension'])
    else:
        net_gcn = GATNet_ss(args['embedding_dim'], args['reduced_dimension'])
        g.add_edges(list(range(node_num)), list(range(node_num)))

    adj_raw = load_adj_raw(args['dataset']).tocsr()
    idx_mask = list(range(node_num))
    adj_mask = adj_raw
    adj_mask[idx_mask, idx_mask] = 0
    adj_mask = sparse_mx_to_torch_sparse_tensor(normalize_adj(adj_mask)).cuda()

    reduced_dim = args['reduced_dimension']
    ss_labels, _, _ = features.svd()
    ss_labels = ss_labels[:, :reduced_dim].cuda()

    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
    best_val = 0
    best_val_test = 0
    for epoch in range(400):

        optimizer.zero_grad()
        output, output_ss = net_gcn(g, features, 0, 0)
        loss_target = loss_func(output[idx_train], labels[idx_train])
        loss_ss = loss_func_ss(output_ss, ss_labels) * 1e2
        loss = loss_target + loss_ss * args['loss_weight']
        # print('epoch', epoch, 'loss', loss_target.data)
        loss.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            net_gcn.eval()
            output, _ = net_gcn(g, features, 0, 0)
            # loss_val.append(loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            # print('val acc', f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))
            wandb.log({
                'val_acc': f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro')
            })
            acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro')
            acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro')
            if acc_val > best_val:
                best_val = acc_val
                best_val_test = acc_test

    return best_val, best_val_test
def subgraph_sample(graph, idx_train, idx_val, idx_test):
    '''
    graph: the original graph
    idx_train: the index of training set nodes in the graph
    idx_val: the index of validation set nodes in the graph
    idx_test: the index of test set nodes in the graph
    '''
    size_of_graph = len(graph)

    target_size = 3000
    number_subgraph = 50

    num_train = idx_train.size()[0]
    num_val = idx_val.size()[0]
    num_test = idx_test.size()[0]

    subgraph_set_dict = {}
    print('Target number of subgraph:', number_subgraph)
    for iter in range(number_subgraph):
        #select initial node, and store it in the index_subgraph list
        index_subgraph = [np.random.randint(0, num_train)]
        #the neighbor node set of the initial nodes
        neighbors = graph[index_subgraph[0]]
        len_subgraph = 0
        while(1):
            len_neighbors = len(neighbors)
            if(len_neighbors == 0):#getting stuck in the inconnected graph, select restart node
                while(1):    
                    restart_node = np.random.randint(0, num_train)
                    if(restart_node not in index_subgraph):
                        break
                index_subgraph.append(restart_node)
                neighbors = neighbors + graph[restart_node]
                neighbors = list(set(neighbors) - set(index_subgraph))
            else:
                #select part (half) of the neighbor nodes and insert them into the current subgraph
                if ((target_size - len_subgraph) > (len_neighbors*0.5)):#judge if we need to select that much neighbors
                    neig_random = random.sample(neighbors, max(1, int(0.5*len_neighbors)))
                    neighbors = list(set(neighbors) - set(neig_random))

                    index_subgraph = index_subgraph + neig_random
                    index_subgraph = list(set(index_subgraph))
                    for i in neig_random:
                        neighbors = neighbors + graph[i]
                    neighbors = list(set(neighbors) - set(index_subgraph))
                    len_subgraph = len(index_subgraph)
                else:
                    neig_random = random.sample(neighbors, (target_size - len_subgraph))
                    index_subgraph = index_subgraph + neig_random
                    index_subgraph = list(set(index_subgraph))
                    break

        idx_train_subgraph = []
        idx_val_subgraph = []
        idx_test_subgraph = []
        idx_test_dict = {}
        index_subgraph = list(set(index_subgraph + list(range(num_train, num_train+num_val))))
        #record the new index of nodes in the subgraph
        for i in range(len(index_subgraph)):
            if(index_subgraph[i]<num_train):
                idx_train_subgraph.append(i)
            elif(index_subgraph[i]<(num_train + num_val)):
                idx_val_subgraph.append(i)
            elif(index_subgraph[i]<(num_train + num_val + num_test)):
                idx_test_subgraph.append(i)
                idx_test_dict[i] = index_subgraph[i]

        print(iter + 1, 'th subgraph has been sampled')

        #generate the adjacency matrix of the subgraph
        G = nx.from_dict_of_lists(graph)
        g = G.subgraph(index_subgraph)
        adj =nx.adjacency_matrix(g)
        adj = normalize_adj(adj + sp.eye(adj.shape[0]))

        index_subgraph = torch.LongTensor(index_subgraph)
        idx_train_subgraph = torch.LongTensor(idx_train_subgraph)
        idx_val_subgraph = torch.LongTensor(idx_val_subgraph)
        idx_test_subgraph = torch.LongTensor(idx_test_subgraph)

        #store the information of generated subgraph: 
        #indices of nodes in the original graph G;
        #adjacency matrix;
        #new indices (indices in the subgraph) of nodes belong to train, val, test set.
        #In this way, we do not have to load the adjacency matrix of the original graph during the training process
        subgraph_set_dict[iter] = {'index_subgraph': index_subgraph, 'adj':adj,
                 'idx_train': idx_train_subgraph, 'idx_val': idx_val_subgraph,
                 'idx_test': idx_test_subgraph, 'idx_test_dict':idx_test_dict}

    return subgraph_set_dict
Ejemplo n.º 24
0
        ctx = get_extension_context('cudnn', device_id='0')
        nn.set_default_context(ctx)
    except:
        pass

    print('Loading dataset...')
    G, feature_matrix, labels = load_cora()

    num_nodes = len(G.nodes)
    num_classes = max(labels) + 1

    train_mask, valid_mask, test_mask = get_mask(20, 500, 500, num_nodes,
                                                 num_classes, labels)

    print('Preprocessing data...')
    A_hat = normalize_adj(G)

    print('Building model...')
    A_hat = nn.Variable.from_numpy_array(A_hat)
    X = nn.Variable.from_numpy_array(feature_matrix)
    labels = nn.Variable.from_numpy_array(np.expand_dims(labels, axis=1))
    train_mask = nn.Variable.from_numpy_array(
        np.expand_dims(train_mask, axis=1))
    valid_mask = nn.Variable.from_numpy_array(
        np.expand_dims(valid_mask, axis=1))
    test_mask = nn.Variable.from_numpy_array(np.expand_dims(test_mask, axis=1))

    H = gcn(A_hat, X, num_classes, 0.5)
    H_valid = gcn(A_hat, X, num_classes, 0)

    # Solver / Optimizer
Ejemplo n.º 25
0
def run_get_admm_weight_mask(args, index, wei_percent, seed):

    adj = np.load("./ADMM/admm_{}/adj_{}.npy".format(args['dataset'], index))
    adj = utils.normalize_adj(adj)
    adj = utils.sparse_mx_to_torch_sparse_tensor(adj)

    pruning.setup_seed(seed)
    _, features, labels, idx_train, idx_val, idx_test = load_data(
        args['dataset'])
    adj = adj.to_dense()

    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    adj = adj.cuda()
    features = features.cuda()
    labels = labels.cuda()
    loss_func = nn.CrossEntropyLoss()

    net_gcn = net.net_gcn_baseline(embedding_dim=args['embedding_dim'])
    pruning.add_mask(net_gcn)
    net_gcn = net_gcn.cuda()

    for name, param in net_gcn.named_parameters():
        if 'mask' in name:
            param.requires_grad = False
            print("NAME:{}\tSHAPE:{}\tGRAD:{}".format(name, param.shape,
                                                      param.requires_grad))

    optimizer = torch.optim.Adam(net_gcn.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])
    acc_test = 0.0
    best_val_acc = {'val_acc': 0, 'epoch': 0, 'test_acc': 0}
    rewind_weight = copy.deepcopy(net_gcn.state_dict())

    for epoch in range(args['total_epoch']):

        optimizer.zero_grad()
        output = net_gcn(features, adj)
        loss = loss_func(output[idx_train], labels[idx_train])
        loss.backward()

        optimizer.step()
        with torch.no_grad():
            output = net_gcn(features, adj, val_test=True)
            acc_val = f1_score(labels[idx_val].cpu().numpy(),
                               output[idx_val].cpu().numpy().argmax(axis=1),
                               average='micro')
            acc_test = f1_score(labels[idx_test].cpu().numpy(),
                                output[idx_test].cpu().numpy().argmax(axis=1),
                                average='micro')
            if acc_val > best_val_acc['val_acc']:
                best_val_acc['test_acc'] = acc_test
                best_val_acc['val_acc'] = acc_val
                best_val_acc['epoch'] = epoch
                best_epoch_mask = pruning.get_final_weight_mask_epoch(
                    net_gcn, wei_percent=wei_percent)

            print(
                "(ADMM Get Mask) Epoch:[{}] Val:[{:.2f}] Test:[{:.2f}] | Best Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}]"
                .format(epoch, acc_val * 100, acc_test * 100,
                        best_val_acc['val_acc'] * 100,
                        best_val_acc['test_acc'] * 100, best_val_acc['epoch']))

    return best_epoch_mask, rewind_weight
Ejemplo n.º 26
0
def run(args, seed):

    setup_seed(seed)
    dataset = args['dataset']
    adj, features, labels, idx_train, idx_val, idx_test = load_data(dataset) 

    idx_unlabeled = list(range(len(idx_train), features.size()[0]))
    # print(len(idx_train), features.size()[0])
    idx_unlabeled = np.random.permutation(idx_unlabeled)
    idx_clean = list(idx_unlabeled[:100])
    idx_adv = list(idx_unlabeled[100:300])

    adj = adj.cuda()

    features = features.cuda()
    labels = labels.cuda()

    net_gcn = net.net_gcn(embedding_dim=args['embedding_dim'])
    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
    loss_func = nn.CrossEntropyLoss()
    loss_func_ss = nn.L1Loss()
    loss_val = []
    early_stopping = 10

    for epoch in range(1000):

        optimizer.zero_grad()
        output = net_gcn(features, adj)
        loss_train = loss_func(output[idx_train], labels[idx_train])
        # print('epoch', epoch, 'loss', loss_train.data)
        loss_train.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            output = net_gcn(features, adj, val_test=True)
            loss_val.append(loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            # print('val acc', f1_score(labels[idx_val].cpu(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))

        # early stopping
        if epoch > early_stopping and loss_val[-1] > np.mean(loss_val[-(early_stopping+1):-1]):
            break

    # test
    with torch.no_grad():
        output = net_gcn(features, adj, val_test=True)
        # print('')
        acc = f1_score(labels[idx_test].cpu(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro')
        # print('test acc', acc)

    #########
    # robust training
    w0 = np.load('./weights/' + dataset + '_w0.npy').transpose()
    w1 = np.load('./weights/' + dataset + '_w1.npy').transpose()

    adj_raw, features_raw, _ = load_data_raw(dataset)

    pseudo_labels = output.argmax(dim=1).cpu().numpy()
    # print(pseudo_labels)
   
    _, _, adj_per, features_per, _ = graph_attack(adj_raw, features_raw, pseudo_labels, w0, w1, True, True, idx_adv, n=2)

    node_num = features.size()[0]
    idx_mask = list(range(node_num))
    adj_mask = adj_per
    adj_mask[idx_mask, idx_mask] = 0
    adj_mask = sparse_mx_to_torch_sparse_tensor(normalize_adj(adj_mask)).cuda()
    dimm = args['partition_num']

    # partition_labels = partition(adj_per, args['partition_num'])
    # partition_labels = partition(adj_raw, args['partition_num'])

    features_per, adj_per = preprocess_feat_adj(features_per, adj_per)
    f_mask, _, _ = features_per.svd()
    partition_labels = f_mask[:, :dimm]

    pseudo_labels = torch.tensor(pseudo_labels).cuda()

    net_gcn = net.net_gcn_2task(embedding_dim=args['embedding_dim'], ss_class_num=args['partition_num'])
    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])

    best_val = 0
    for epoch in range(500):

        optimizer.zero_grad()

        output, output_ss = net_gcn(features, features_per, adj, adj_per)
        output_adv, _ = net_gcn(features_per, features_per, adj_per, adj_per)

        loss_train = loss_func(output[idx_train], labels[idx_train]) + loss_func_ss(output_ss, partition_labels) * args['task_ratio'] * 1e2
        loss_adv_1 = loss_func(output_adv[idx_clean], pseudo_labels[idx_clean])
        loss_adv_2 = loss_func(output_adv[idx_adv], pseudo_labels[idx_adv])

        loss = loss_train + 1 * (loss_adv_1 + loss_adv_2)

        # print('epoch', epoch, 'loss', loss_train.data)
        loss.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            output, _ = net_gcn(features, features_per, adj, adj_per, val_test=True)
            loss_val.append(loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            # print('val acc', f1_score(labels[idx_val].cpu(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))

            val_a = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro')
            if val_a > best_val:
                best_val = val_a
                net_gcn_best = net_gcn

        '''
        # early stopping
        if epoch > early_stopping and loss_val[-1] > np.mean(loss_val[-(early_stopping+1):-1]):
            break
        '''

    net_gcn = net_gcn_best
    # test
    with torch.no_grad():
        output, _ = net_gcn(features, features_per, adj, adj_per, val_test=True)
        # print('')
        acc = f1_score(labels[idx_test].cpu(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro')
        # print('test acc', acc)

    #########
    # attack
    w0 = np.load('./weights/' + dataset + '_w0.npy').transpose()
    w1 = np.load('./weights/' + dataset + '_w1.npy').transpose()

    adj_raw, features_raw, labels_raw = load_data_raw(dataset)

    correct_pred_link = 0
    correct_pred_feat = 0
    correct_pred_link_feat = 0
    n_attack = args['nattack']
    for idxt, n in zip(idx_test, range(1000)):

        # link
        pernode = [idxt]
        _, _, adj_per, features_per, _ = graph_attack(adj_raw, features_raw, labels_raw, w0, w1, False, True, pernode, n=n_attack)
        features_per, adj_per = preprocess_feat_adj(features_per, adj_per)
        with torch.no_grad():
            output, _ = net_gcn(features_per, features_per, adj_per, adj_per, val_test=True)
            output = output[idxt].cpu().numpy().argmax()
            if output == labels[idxt].cpu().numpy():
                correct_pred_link = correct_pred_link + 1
            print(output, labels[idxt].cpu().numpy())
            print(correct_pred_link, n + 1)

        # feat
        pernode = [idxt]
        _, _, adj_per, features_per, _ = graph_attack(adj_raw, features_raw, labels_raw, w0, w1, True, False, pernode, n=n_attack)
        features_per, adj_per = preprocess_feat_adj(features_per, adj_per)
        with torch.no_grad():
            output, _ = net_gcn(features_per, features_per, adj_per, adj_per, val_test=True)
            output = output[idxt].cpu().numpy().argmax()
            if output == labels[idxt].cpu().numpy():
                correct_pred_feat = correct_pred_feat + 1
            print(output, labels[idxt].cpu().numpy())
            print(correct_pred_feat, n + 1)

        # link feat
        pernode = [idxt]
        _, _, adj_per, features_per, _ = graph_attack(adj_raw, features_raw, labels_raw, w0, w1, True, True, pernode, n=n_attack)
        features_per, adj_per = preprocess_feat_adj(features_per, adj_per)
        with torch.no_grad():
            output, _ = net_gcn(features_per, features_per, adj_per, adj_per, val_test=True)
            output = output[idxt].cpu().numpy().argmax()
            if output == labels[idxt].cpu().numpy():
                correct_pred_link_feat = correct_pred_link_feat + 1
            print(output, labels[idxt].cpu().numpy())
            print(correct_pred_link_feat, n + 1)

    adv_acc_link = correct_pred_link / 1000
    adv_acc_feat = correct_pred_feat / 1000
    adv_acc_link_feat = correct_pred_link_feat / 1000

    return acc, adv_acc_link, adv_acc_feat, adv_acc_link_feat