Ejemplo n.º 1
0
 def _preprocess_adj(self, normalization, adj, cuda):
     adj_normalizer = fetch_normalization(normalization)
     r_adj = adj_normalizer(adj)
     r_adj = sparse_mx_to_torch_sparse_tensor(r_adj).float()
     if cuda:
         r_adj = r_adj.cuda()
     return r_adj
Ejemplo n.º 2
0
def load_reddit_data(normalization="AugNormAdj",
                     porting_to_torch=True,
                     data_path=datadir):
    adj, features, y_train, y_val, y_test, train_index, val_index, test_index = loadRedditFromNPZ(
        data_path)
    labels = np.zeros(adj.shape[0])
    labels[train_index] = y_train
    labels[val_index] = y_val
    labels[test_index] = y_test
    adj = adj + adj.T + sp.eye(adj.shape[0])
    train_adj = adj[train_index, :][:, train_index]
    degree = np.sum(train_adj, axis=1)

    features = torch.FloatTensor(np.array(features))
    features = (features - features.mean(dim=0)) / features.std(dim=0)
    train_features = torch.index_select(features, 0,
                                        torch.LongTensor(train_index))
    if not porting_to_torch:
        features = features.numpy()
        train_features = train_features.numpy()

    adj_normalizer = fetch_normalization(normalization)
    adj = adj_normalizer(adj)
    train_adj = adj_normalizer(train_adj)

    if porting_to_torch:
        train_adj = sparse_mx_to_torch_sparse_tensor(train_adj).float()
        labels = torch.LongTensor(labels)
        adj = sparse_mx_to_torch_sparse_tensor(adj).float()
        degree = torch.LongTensor(degree)
        train_index = torch.LongTensor(train_index)
        val_index = torch.LongTensor(val_index)
        test_index = torch.LongTensor(test_index)
    learning_type = "inductive"
    return adj, train_adj, features, train_features, labels, train_index, val_index, test_index, degree, learning_type
Ejemplo n.º 3
0
def load_reddit_data(data_path="data/",
                     normalization="AugNormAdj",
                     cuda=True,
                     gamma=1.0):
    adj, features, y_train, y_val, y_test, train_index, val_index, test_index = loadRedditFromNPZ(
        "data/")
    labels = np.zeros(adj.shape[0])
    labels[train_index] = y_train
    labels[val_index] = y_val
    labels[test_index] = y_test
    adj = adj + adj.T + sp.eye(adj.shape[0])
    train_adj = adj[train_index, :][:, train_index]
    features = torch.FloatTensor(np.array(features))
    features = (features - features.mean(dim=0)) / features.std(dim=0)
    adj_normalizer = fetch_normalization(normalization)
    if "Aug" in normalization:
        adj = adj_normalizer(adj, gamma)
    else:
        adj = adj_normalizer(adj)
    adj = sparse_mx_to_torch_sparse_tensor(adj).float()
    if "Aug" in normalization:
        train_adj = adj_normalizer(train_adj, gamma)
    else:
        train_adj = adj_normalizer(train_adj)
    train_adj = sparse_mx_to_torch_sparse_tensor(train_adj).float()
    labels = torch.LongTensor(labels)
    if cuda:
        adj = adj.cuda()
        train_adj = train_adj.cuda()
        features = features.cuda()
        labels = labels.cuda()
    return adj, train_adj, features, labels, train_index, val_index, test_index
Ejemplo n.º 4
0
def preprocess_citation_graph(adj, features, normalization="FirstOrderGCN"):
    adj_normalizer = fetch_normalization(normalization)
    adj = adj_normalizer(adj)
    adj2 = features * features.T
    adj2 = adj_normalizer(adj2)
    features = row_normalize(features)
    return adj, features, adj2
Ejemplo n.º 5
0
def load_reddit_data(data_path="data/", normalization="AugNormAdj", cuda=True):
    adj, features, y_train, y_val, y_test, train_index, val_index, test_index = loadRedditFromNPZ(
        data_path)

    # normalize features
    features = torch.FloatTensor(features)
    features = (features - features.mean(dim=0)) / features.std(dim=0)

    # all labels
    labels = np.zeros(adj.shape[0])
    labels[train_index] = y_train
    labels[val_index] = y_val
    labels[test_index] = y_test
    labels = torch.LongTensor(labels)

    # get dense adjacency matrix
    # adj = adj + adj.T + sp.eye(adj.shape[0])  # I will be added twice in normalization
    adj = adj + adj.T  # add T
    adj_normalizer = fetch_normalization(
        normalization)  # normalization function
    adj = adj_normalizer(adj)
    adj = sparse_mx_to_torch_sparse_tensor(adj)

    # move to cuda
    if cuda:
        adj = adj.cuda()
        features = features.cuda()
        labels = labels.cuda()

    return adj, features, labels, train_index, val_index, test_index
Ejemplo n.º 6
0
def preprocess_citation_bigraph(adj, features, normalization="FirstOrderGCN"):
    adj_normalizer = fetch_normalization(normalization)
    adj = adj_normalizer(adj)
    adj_cn = features.T
    features = row_normalize(features)
    adj_cn = row_normalize(adj_cn)
    adj_nc = features
    return adj, features, adj_nc, adj_cn
Ejemplo n.º 7
0
def preprocess_citation(adj, features, normalization="FirstOrderGCN", gamma=1):
    adj_normalizer = fetch_normalization(normalization)
    if 'Aug' in normalization:
        adj = adj_normalizer(adj, gamma=gamma)
    else:
        adj = adj_normalizer(adj)
    features = row_normalize(features)
    return adj, features
Ejemplo n.º 8
0
def load_data(dataset_str="cora",
              normalization=[],
              feat_normalize=True,
              cuda=False,
              split="default",
              random_state=None,
              **kwargs):
    """
    Load pickle packed datasets.
    """
    with open(dataf+dataset_str+".graph", "rb") as f:
        graph = pkl.load(f)
    with open(dataf+dataset_str+".X", "rb") as f:
        X = pkl.load(f)
    with open(dataf+dataset_str+".y", "rb") as f:
        y = pkl.load(f)
    if split != "default":
        tr_size, va_size, te_size = [float(i) for i in split.split("_")]
        idx_train, idx_val, idx_test = \
            train_val_test_split(np.arange(len(y)), train_size=tr_size,
                                 val_size=va_size, test_size=te_size,
                                 stratify=y, random_state=random_state) 
    else:
        with open(dataf+dataset_str+".split", "rb") as f:
            split = pkl.load(f)
            idx_train = split['train']
            idx_test = split['test']
            idx_val = split['valid']

    normed_adj = []
    if len(normalization) > 0:
        adj = nx.adj_matrix(graph)
        for n in normalization:
            nf = fetch_normalization(n, **kwargs)
            normed_adj.append(nf(adj))

    if feat_normalize:
        X = row_normalize(X)

    X = torch.FloatTensor(X).float()
    y = torch.LongTensor(y)
    normed_adj = [sparse_mx_to_torch_sparse_tensor(adj).float() \
                  for adj in normed_adj]
    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)

    if cuda:
        X = X.cuda()
        normed_adj = [adj.cuda() for adj in normed_adj]
        y = y.cuda()
        idx_train = idx_train.cuda()
        idx_val = idx_val.cuda()
        idx_test = idx_test.cuda()

    return graph, normed_adj, X, y, idx_train, idx_val, idx_test
Ejemplo n.º 9
0
def preprocess_citation(adj, features, normalization, extra=None):
    adj_normalizer = fetch_normalization(normalization, extra)
    adj = adj_normalizer(adj)
    #row_sum = 1 / (np.sqrt(np.array(adj.sum(1))))
    #row_sum = np.array(adj.sum(1))
    #features = row_sum
    #features = features.todense()
    #features = np.concatenate([features, row_sum], axis=1) 
    #features = sp.lil_matrix(features)
    if normalization != "":
        features = row_normalize(features)
    return adj, features
Ejemplo n.º 10
0
def load_coauthor_phy_data(normalization="AugNormAdj",
                           porting_to_torch=True,
                           data_path=datadir):
    data = load_npz_to_sparse_graph(
        os.path.join(data_path, 'coauthor_phy', 'ms_academic_phy.npz'))

    # make labels
    labels = data.labels

    # make adj
    adj = data.adj_matrix
    adj = adj + adj.T + sp.eye(adj.shape[0])
    adj.data = np.ones_like(adj.data)

    all_index = shuffle(np.arange(len(labels)), random_state=42)
    train_index = all_index[:20000]
    val_index = all_index[20000:25000]
    test_index = all_index[25000:]

    train_adj = adj[train_index, :][:, train_index]
    degree = np.sum(train_adj, axis=1)

    # make features
    features = data.attr_matrix.todense()

    features = torch.FloatTensor(np.array(features))
    features = features / torch.sum(features, dim=1, keepdim=True)
    train_features = torch.index_select(features, 0,
                                        torch.LongTensor(train_index))
    if not porting_to_torch:
        features = features.numpy()
        train_features = train_features.numpy()

    adj_normalizer = fetch_normalization(normalization)
    adj = adj_normalizer(adj)
    train_adj = adj_normalizer(train_adj)

    if porting_to_torch:
        train_adj = sparse_mx_to_torch_sparse_tensor(train_adj).float()
        labels = torch.LongTensor(labels)
        adj = sparse_mx_to_torch_sparse_tensor(adj).float()
        degree = torch.LongTensor(degree)
        train_index = torch.LongTensor(train_index)
        val_index = torch.LongTensor(val_index)
        test_index = torch.LongTensor(test_index)
    learning_type = "inductive"
    return adj, train_adj, features, train_features, labels, train_index, val_index, test_index, degree, learning_type
Ejemplo n.º 11
0
 def _load_raw_graph(txt_file):
     graph = {}
     with open(txt_file, 'r') as f:
         cur_idx = 0
         for row in f:
             row = row.strip().split()
             adjs = []
             for j in range(1, len(row)):
                 adjs.append(int(row[j]))
             graph[cur_idx] = adjs
             cur_idx += 1
     adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
     normalization = "AugNormAdj"
     adj_normalizer = fetch_normalization(normalization)
     adj = adj_normalizer(adj)
     # adj = AmazonAll._sparse_mx_to_torch_sparse_tensor(adj).float()
     return adj
Ejemplo n.º 12
0
    def train(epoch):
        model.train()

        if epoch == 51:
            for param_group in optimizer.param_groups:
                param_group['lr'] = 0.5 * param_group['lr']

        loss_all = 0
        for data in train_loader:
            data = data.to(device)
            optimizer.zero_grad()
            if data.edge_attr is None:
                edge_weight = torch.ones((data.edge_index.size(1), ),
                                         dtype=torch.float32,
                                         device=data.edge_index.device)
            else:
                if args.dataset == 'MUTAG':
                    edge_weight = data.edge_attr.argmax(1)
                else:
                    edge_weight = data.edge_attr
            adj_sp = csr_matrix((edge_weight.cpu().numpy(),
                                 (data.edge_index[0, :].cpu().numpy(),
                                  data.edge_index[1, :].cpu().numpy())),
                                shape=(data.num_nodes, data.num_nodes))
            adj_normalizer = fetch_normalization("AugNormAdj")
            adj_sp_nz = adj_normalizer(adj_sp)
            adj = torch.sparse.FloatTensor(
                torch.LongTensor(np.array([adj_sp_nz.row,
                                           adj_sp_nz.col])).to(device),
                torch.Tensor(adj_sp_nz.data).to(device),
                torch.Size([data.num_nodes, data.num_nodes]))  #normalized adj

            adj_ori = torch.sparse.FloatTensor(
                data.edge_index, edge_weight,
                torch.Size([data.num_nodes, data.num_nodes]))  #original adj
            if data.x is None:
                data.x = torch.sparse.sum(
                    adj_ori, [0]).to_dense().unsqueeze(1).to(device)
            output = model(data.x.T, adj, data.batch)
            loss = F.nll_loss(output, data.y)
            loss.backward()
            loss_all += loss.item() * data.num_graphs
            optimizer.step()
        return loss_all / len(train_dataset)
Ejemplo n.º 13
0
def load_reddit_data(normalization="AugNormAdj",
                     porting_to_torch=True,
                     data_path=datadir):
    adj, features, labels, train_index, val_index, test_index = loadRedditFromNPZ(
        data_path)
    # labels = np.zeros(adj.shape[0])
    # labels[train_index]  = y_train
    # labels[val_index]  = y_val
    # labels[test_index]  = y_test
    # the loaded data is symetric and with self-loop, so we do not need the following pre-process
    # adj = adj + adj.T + sp.eye(adj.shape[0])

    # sampling small set of train index because of our limited computation resources
    train_index = shuffle(train_index,
                          random_state=42)[:int(0.1 * len(train_index))]
    train_adj = adj[train_index, :][:, train_index]
    degree = np.sum(train_adj, axis=1)

    features = torch.FloatTensor(np.array(features))
    # features[:, :2] = features[:, :2]/torch.max(features[:, :2], dim=0, keepdim=True)[0]
    # features = features/torch.max(features, dim=0, keepdim=True)[0]
    features = (features - features.mean(dim=0)) / features.std(dim=0)
    train_features = torch.index_select(features, 0,
                                        torch.LongTensor(train_index))
    if not porting_to_torch:
        features = features.numpy()
        train_features = train_features.numpy()

    adj_normalizer = fetch_normalization(normalization)
    adj = adj_normalizer(adj)
    train_adj = adj_normalizer(train_adj)

    if porting_to_torch:
        train_adj = sparse_mx_to_torch_sparse_tensor(train_adj).float()
        labels = torch.LongTensor(labels)
        adj = sparse_mx_to_torch_sparse_tensor(adj).float()
        degree = torch.LongTensor(degree)
        train_index = torch.LongTensor(train_index)
        val_index = torch.LongTensor(val_index)
        test_index = torch.LongTensor(test_index)
    learning_type = "inductive"
    return adj, train_adj, features, train_features, labels, train_index, val_index, test_index, degree, learning_type
Ejemplo n.º 14
0
    def test(loader):
        model.eval()

        correct = 0
        for data in loader:
            data = data.to(device)
            if data.edge_attr is None:
                edge_weight = torch.ones((data.edge_index.size(1), ),
                                         dtype=torch.float32,
                                         device=data.edge_index.device)
            else:
                edge_weight = torch.ones((data.edge_index.size(1), ),
                                         dtype=torch.float32,
                                         device=data.edge_index.device)
            adj_sp = csr_matrix((edge_weight.cpu().numpy(),
                                 (data.edge_index[0, :].cpu().numpy(),
                                  data.edge_index[1, :].cpu().numpy())),
                                shape=(data.num_nodes, data.num_nodes))
            adj_sp = adj_sp + adj_sp.T
            adj_normalizer = fetch_normalization("AugNormAdj")
            adj_sp_nz = adj_normalizer(adj_sp)
            adj = torch.sparse.FloatTensor(
                torch.LongTensor(np.array([adj_sp_nz.row,
                                           adj_sp_nz.col])).to(device),
                torch.Tensor(adj_sp_nz.data).to(device),
                torch.Size([data.num_nodes, data.num_nodes]))  #normalized adj

            adj_ori = torch.sparse.FloatTensor(
                data.edge_index, edge_weight,
                torch.Size([data.num_nodes, data.num_nodes]))  #original adj

            if data.x is None:
                data.x = torch.sparse.sum(
                    adj_ori, [0]).to_dense().unsqueeze(1).to(device)
            output = model(data.x.T, adj, data.batch)
            pred = output.max(dim=1)[1]
            correct += pred.eq(data.y).sum().item()
        return correct / len(loader.dataset)
Ejemplo n.º 15
0
def process_data(data, args, normalization="AugNormAdj", usecuda=True):
    # '''
    #   图:
    #    adj,tuple-(9475,9475)
    #   X tweet文本??Y是该用户所属的类
    #    X_train,tuple-(5685,9467)
    #    Y_train,tuple-(5685,)
    #    X_dev, tuple-(1895,9467)
    #    Y_dev, tuple-(1895,)
    #    X_test, tuple-(1895,9467)
    #    Y_test, tuple-(1895,)
    #  用户:
    #    U_train, list-5685
    #    U_dev, list-1895
    #    U_test, list-1895
    #  129个类的经纬度:
    #    classLatMedian, dict-129
    #    classLonMedian, dict-129
    #  用户经纬度:
    #    userLocation,dict-9475
    # '''
    adj, X_train, Y_train, X_dev, Y_dev, X_test, Y_test, U_train, U_dev, U_test, \
        classLatMedian, classLonMedian, userLocation, tf_idf_num = data  # , tf_idf_num

    '''porting to pyTorch and concat the matrix'''
    adj_normalizer = fetch_normalization(normalization)
    adj = adj_normalizer(adj)  # 9475,9475
    adj = sparse_mx_to_torch_sparse_tensor(
        adj).float()  # torch.Size([9475, 9475])

    '''
	context_features
	features = np.vstack((X_train, X_dev, X_test))
	if args.usenormaliza == True:
		print("using feature_normalization1...")
		features = feature_normalization1(features)

	sum(tfidf)*context_features
	features1 = np.vstack((X_train, X_dev, X_test))
	features2 = tf_idf_num
	if args.usenormaliza == True:
		print("using feature_normalization2...")
		features1 = feature_normalization2(features1)
		# features1 = feature_normalization3(features1)#203.5  352.6
	features1 = torch.FloatTensor(features1)
	features2 = torch.FloatTensor(features2)
	features= features1.view(features1.size(1), -1)*features2
	features=features.view(features.size(1), -1)
	'''

    # tf-idf
    if args.usenormaliza == True:
        # X_train = X_train.todense()
        # X_test = X_test.todense()
        # X_dev = X_dev.todense()
        print("using feature_normalization1...")
        features = np.vstack((X_train, X_dev, X_test))
        features = feature_normalization1(features)
        features = torch.FloatTensor(features)

    features = torch.FloatTensor(features)
    print("features:", features.shape)
    '''get labels'''
    labels = torch.LongTensor(np.hstack((Y_train, Y_dev, Y_test)))  # 一维9475

    '''get index of train val and test'''
    len_train = int(X_train.shape[0])  # 5685
    len_val = int(X_dev.shape[0])  # 1895
    len_test = int(X_test.shape[0])  # 1895
    # 构建label tensor
    idx_train = torch.LongTensor(range(len_train))  # torch.Size([5685])
    idx_val = torch.LongTensor(
        range(
            len_train,
            len_train +
            len_val))  # [5685……7579]
    idx_test = torch.LongTensor(
        range(
            len_train +
            len_val,
            len_train +
            len_val +
            len_test))  # [7580……9474]

    '''convert to cuda'''
    if usecuda:
        print("converting data to CUDA format...")
        adj = adj.cuda()
        features = features.cuda()
        labels = labels.cuda()
        idx_train = idx_train.cuda()
        idx_val = idx_val.cuda()
        idx_test = idx_test.cuda()

    data = (adj, features, labels, idx_train, idx_val, idx_test, U_train, U_dev,
            U_test, classLatMedian, classLonMedian, userLocation)
    return data
Ejemplo n.º 16
0
def load_facebook_page_data(normalization, porting_to_torch, data_path=None):
    edges = pd.read_csv(os.path.join(data_path, 'facebook_page',
                                     'musae_facebook_edges.csv'),
                        header=0,
                        sep=',')
    raw_feats = json.load(
        open(
            os.path.join(data_path, 'facebook_page',
                         'musae_facebook_features.json'), 'r'))
    # make adj
    adj = sp.coo_matrix(
        (np.ones(len(edges)), (edges.values[:, 0], edges.values[:, 1])),
        shape=[len(raw_feats), len(raw_feats)])
    adj = adj.tocsr()
    adj = adj + adj.T + sp.eye(adj.shape[0])
    adj.data = np.ones_like(adj.data)

    adj = adj + adj.T + sp.eye(adj.shape[0])

    train_adj = adj  # transductive setting
    degree = np.sum(train_adj, axis=1)

    # make features
    feat_set = set()
    for k in raw_feats:
        feat_set = feat_set | set(raw_feats[k])
    feat_dim = len(list(feat_set))
    features = np.zeros(shape=[adj.shape[0], feat_dim])
    for k in raw_feats:
        features[int(k), :][raw_feats[k]] = 1.0

    # make labels
    raw_label_data = pd.read_csv(os.path.join(data_path, 'facebook_page',
                                              'musae_facebook_target.csv'),
                                 header=0,
                                 sep=',')
    raw_labels = raw_label_data['page_type'].unique()
    label_map = pd.Series(data=range(len(raw_labels)), index=raw_labels)
    raw_label_data['label'] = label_map[
        raw_label_data['page_type'].values].values
    labels = raw_label_data['label'].values

    # split data
    if not os.path.exists(
            os.path.join(data_path, 'facebook_page', 'train_index.npy')):
        print('make split data.......')
        train_index = []
        val_index = []
        test_index = []
        for l in range(labels.max() + 1):
            tmp_index = np.where(labels == l)[0]
            tmp_index = shuffle(tmp_index, random_state=42)
            tmp_train = tmp_index[:20]
            tmp_val = tmp_index[20:50]
            tmp_test = tmp_index[50:]

            train_index.append(tmp_train)
            val_index.append(tmp_val)
            test_index.append(tmp_test)
        train_index = shuffle(np.concatenate(train_index), random_state=42)
        val_index = shuffle(np.concatenate(val_index), random_state=42)
        test_index = shuffle(np.concatenate(test_index), random_state=42)

        np.save(os.path.join(data_path, 'facebook_page', 'train_index.npy'),
                train_index)
        np.save(os.path.join(data_path, 'facebook_page', 'val_index.npy'),
                val_index)
        np.save(os.path.join(data_path, 'facebook_page', 'test_index.npy'),
                test_index)
    else:
        print('load split data......')
        train_index = np.load(
            os.path.join(data_path, 'facebook_page', 'train_index.npy'))
        val_index = np.load(
            os.path.join(data_path, 'facebook_page', 'val_index.npy'))
        test_index = np.load(
            os.path.join(data_path, 'facebook_page', 'test_index.npy'))

    # process data
    features = torch.FloatTensor(features)
    features = features / torch.sum(features, dim=1, keepdim=True)
    train_features = features

    if not porting_to_torch:
        features = features.numpy()
        train_features = train_features.numpy()

    adj_normalizer = fetch_normalization(normalization)
    adj = adj_normalizer(adj)
    train_adj = adj_normalizer(train_adj)

    if porting_to_torch:
        train_adj = sparse_mx_to_torch_sparse_tensor(train_adj).float()
        labels = torch.LongTensor(labels)
        adj = sparse_mx_to_torch_sparse_tensor(adj).float()
        degree = torch.LongTensor(degree)
        train_index = torch.LongTensor(train_index)
        val_index = torch.LongTensor(val_index)
        test_index = torch.LongTensor(test_index)
    learning_type = "transductive"
    return adj, train_adj, features, train_features, labels, train_index, val_index, test_index, degree, learning_type
Ejemplo n.º 17
0
def load_cora_full_data(normalization="AugNormAdj",
                        porting_to_torch=True,
                        data_path=datadir):
    data = load_npz_to_sparse_graph(
        os.path.join(data_path, 'cora_full', 'cora_full.npz'))

    # delete labeled nodes less than 50
    adj = data.adj_matrix
    features = data.attr_matrix.todense()
    labels = data.labels

    mask = []
    count_dict = {}
    for l in labels:
        tmp_index = np.where(labels == l)[0]
        if l not in count_dict:
            count_dict[l] = len(tmp_index)

        if len(tmp_index) > 55:
            mask.append(True)
        else:
            mask.append(False)
    mask = np.array(mask)

    adj = adj[mask, :][:, mask]
    features = features[mask]
    labels = labels[mask]

    # re-assign labels
    label_map = pd.Series(index=np.unique(labels),
                          data=np.arange(len(np.unique(labels))))
    labels = label_map[labels].values

    # make adj
    adj = adj + adj.T + sp.eye(adj.shape[0])
    adj.data = np.ones_like(adj.data)
    train_adj = adj
    degree = np.sum(train_adj, axis=1)

    # make features
    features = torch.FloatTensor(features)
    # features = features / torch.sum(features, dim=1, keepdim=True)
    train_features = features  # transductive setting
    if not porting_to_torch:
        features = features.numpy()
        train_features = train_features.numpy()

    # split data
    if not os.path.exists(
            os.path.join(data_path, 'cora_full', 'train_index.npy')):
        print('make split data......')
        train_index = []
        val_index = []
        test_index = []
        for l in range(labels.max() + 1):
            tmp_index = np.where(labels == l)[0]
            tmp_index = shuffle(tmp_index, random_state=42)
            tmp_train = tmp_index[:20]
            tmp_val = tmp_index[20:50]
            tmp_test = tmp_index[50:]

            train_index.append(tmp_train)
            val_index.append(tmp_val)
            test_index.append(tmp_test)
        train_index = shuffle(np.concatenate(train_index), random_state=42)
        val_index = shuffle(np.concatenate(val_index), random_state=42)
        test_index = shuffle(np.concatenate(test_index), random_state=42)

        np.save(os.path.join(data_path, 'cora_full', 'train_index.npy'),
                train_index)
        np.save(os.path.join(data_path, 'cora_full', 'val_index.npy'),
                val_index)
        np.save(os.path.join(data_path, 'cora_full', 'test_index.npy'),
                test_index)
    else:
        print('load split data......')
        train_index = np.load(
            os.path.join(data_path, 'cora_full', 'train_index.npy'))
        val_index = np.load(
            os.path.join(data_path, 'cora_full', 'val_index.npy'))
        test_index = np.load(
            os.path.join(data_path, 'cora_full', 'test_index.npy'))

    adj_normalizer = fetch_normalization(normalization)
    adj = adj_normalizer(adj)
    train_adj = adj_normalizer(train_adj)

    if porting_to_torch:
        train_adj = sparse_mx_to_torch_sparse_tensor(train_adj).float()
        labels = torch.LongTensor(labels)
        adj = sparse_mx_to_torch_sparse_tensor(adj).float()
        degree = torch.LongTensor(degree)
        train_index = torch.LongTensor(train_index)
        val_index = torch.LongTensor(val_index)
        test_index = torch.LongTensor(test_index)
    learning_type = "transductive"
    return adj, train_adj, features, train_features, labels, train_index, val_index, test_index, degree, learning_type
Ejemplo n.º 18
0
def preprocess_synthetic(adj, features, normalization, extra=None):
    adj_normalizer = fetch_normalization(normalization, extra)
    adj = adj_normalizer(adj)
    return adj, features
Ejemplo n.º 19
0
def load_amazon_photo_data(normalization="AugNormAdj",
                           porting_to_torch=True,
                           data_path=datadir):
    data = load_npz_to_sparse_graph(
        os.path.join(data_path, 'amazon_photo',
                     'amazon_electronics_photo.npz'))

    # make adj
    adj = data.adj_matrix
    adj = adj + adj.T + sp.eye(adj.shape[0])
    adj.data = np.ones_like(adj.data)
    train_adj = adj
    degree = np.sum(train_adj, axis=1)

    # make features
    features = data.attr_matrix.todense()

    features = torch.FloatTensor(features)
    features = features / torch.sum(features, dim=1, keepdim=True)
    train_features = features  # transductive setting
    if not porting_to_torch:
        features = features.numpy()
        train_features = train_features.numpy()

    # make labels
    labels = data.labels
    # split data
    if not os.path.exists(
            os.path.join(data_path, 'amazon_photo', 'train_index.npy')):
        print('make split data......')
        train_index = []
        val_index = []
        test_index = []
        for l in range(labels.max() + 1):
            tmp_index = np.where(labels == l)[0]
            tmp_index = shuffle(tmp_index, random_state=42)
            tmp_train = tmp_index[:20]
            tmp_val = tmp_index[20:50]
            tmp_test = tmp_index[50:]

            train_index.append(tmp_train)
            val_index.append(tmp_val)
            test_index.append(tmp_test)
        train_index = shuffle(np.concatenate(train_index), random_state=42)
        val_index = shuffle(np.concatenate(val_index), random_state=42)
        test_index = shuffle(np.concatenate(test_index), random_state=42)

        np.save(os.path.join(data_path, 'amazon_photo', 'train_index.npy'),
                train_index)
        np.save(os.path.join(data_path, 'amazon_photo', 'val_index.npy'),
                val_index)
        np.save(os.path.join(data_path, 'amazon_photo', 'test_index.npy'),
                test_index)
    else:
        print('load split data......')
        train_index = np.load(
            os.path.join(data_path, 'amazon_photo', 'train_index.npy'))
        val_index = np.load(
            os.path.join(data_path, 'amazon_photo', 'val_index.npy'))
        test_index = np.load(
            os.path.join(data_path, 'amazon_photo', 'test_index.npy'))

    adj_normalizer = fetch_normalization(normalization)
    adj = adj_normalizer(adj)
    train_adj = adj_normalizer(train_adj)

    if porting_to_torch:
        train_adj = sparse_mx_to_torch_sparse_tensor(train_adj).float()
        labels = torch.LongTensor(labels)
        adj = sparse_mx_to_torch_sparse_tensor(adj).float()
        degree = torch.LongTensor(degree)
        train_index = torch.LongTensor(train_index)
        val_index = torch.LongTensor(val_index)
        test_index = torch.LongTensor(test_index)
    learning_type = "transductive"
    return adj, train_adj, features, train_features, labels, train_index, val_index, test_index, degree, learning_type
Ejemplo n.º 20
0
def preprocess_citation(adj, features, normalization='AugNormAdj'):
    adj_normalizer = fetch_normalization(normalization)
    adj = adj_normalizer(adj)
    features = row_normalize(features)
    return adj, features
Ejemplo n.º 21
0
def preprocess_citation(adj, features, normalization='FIrstOrderGCN'):
    adj_normalizer = fetch_normalization(normalization)
    adj = adj_normalizer(adj)
    features = row_normalize(features)
    return adj, features
Ejemplo n.º 22
0
def preprocess_adj(adj, normalization="AugNormAdj"):
    adj_normalizer = fetch_normalization(normalization)
    adj = adj_normalizer(adj)
    return adj
Ejemplo n.º 23
0
def preprocess_citation(adj, features, normalization="FirstOrderGCN"):
    adj_normalizer = fetch_normalization(normalization)
    #features, Droot = row_normalize(features,adj)
    features = row_normalize(features)
    adj = adj_normalizer(adj)
    return adj, features