def get_product_clusters(): dataset_name = "ogbn-products" dataset = PygNodePropPredDataset(name=dataset_name) print('The {} dataset has {} graph'.format(dataset_name, len(dataset))) data = dataset[0] print(data) split_idx = dataset.get_idx_split() train_idx = split_idx['train'] val_idx = split_idx['valid'] test_idx = split_idx['test'] train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) train_mask[train_idx] = True data['train_mask'] = train_mask val_mask = torch.zeros(data.num_nodes, dtype=torch.bool) val_mask[val_idx] = True data['valid_mask'] = val_mask test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) test_mask[test_idx] = True data['test_mask'] = test_mask cluster_data = ClusterData(data, num_parts=15000, save_dir="dataset") return cluster_data, dataset, data, split_idx
def build_sampler(args, data, save_dir): if args.sampler == 'rw-my': msg = 'Use GraphSaint randomwalk sampler(mysaint sampler)' loader = MySAINTSampler(data, batch_size=args.batch_size, sample_type='random_walk', walk_length=2, sample_coverage=1000, save_dir=save_dir) elif args.sampler == 'node-my': msg = 'Use random node sampler(mysaint sampler)' loader = MySAINTSampler(data, sample_type='node', batch_size=args.batch_size * 3, walk_length=2, sample_coverage=1000, save_dir=save_dir) elif args.sampler == 'rw': msg = 'Use GraphSaint randomwalk sampler' loader = GraphSAINTRandomWalkSampler(data, batch_size=args.batch_size, walk_length=2, num_steps=5, sample_coverage=1000, save_dir=save_dir) elif args.sampler == 'node': msg = 'Use GraphSaint node sampler' loader = GraphSAINTNodeSampler(data, batch_size=args.batch_size * 3, num_steps=5, sample_coverage=1000, num_workers=0, save_dir=save_dir) elif args.sampler == 'edge': msg = 'Use GraphSaint edge sampler' loader = GraphSAINTEdgeSampler(data, batch_size=args.batch_size, num_steps=5, sample_coverage=1000, save_dir=save_dir, num_workers=0) elif args.sampler == 'cluster': msg = 'Use cluster sampler' cluster_data = ClusterData(data, num_parts=args.num_parts, save_dir=save_dir) loader = ClusterLoader(cluster_data, batch_size=20, shuffle=True, num_workers=0) else: raise KeyError('Sampler type error') return loader, msg
def cluster_data(data, num_clusters, batch_size, shuffle=True, verbose=True): """Prepares clusters for batching Parameters ---------- data : torch_geometric.data.Data Graph data object. num_clusters : int The number of clusters to chop the input graph into. batch_size : int The number of clusters in each batch shuffle : bool, optional If true, the ClusterLoader will shuffle clusters, by default True verbose : bool, optional If true, prints clusters info, by default True Returns ------- torch_geometric.data.ClusterLoader A loader for training """ clusters = ClusterData(data, num_clusters, recursive=True, save_dir=None) if verbose: for cluster in clusters: print(cluster) return ClusterLoader(clusters, batch_size=batch_size)
def main(): parser = argparse.ArgumentParser(description='OGBN-Products (Cluster-GCN)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--num_partitions', type=int, default=15000) parser.add_argument('--num_workers', type=int, default=6) parser.add_argument('--num_layers', type=int, default=3) parser.add_argument('--hidden_channels', type=int, default=256) parser.add_argument('--dropout', type=float, default=0.0) parser.add_argument('--batch_size', type=int, default=256) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--epochs', type=int, default=20) parser.add_argument('--runs', type=int, default=10) args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = PygNodePropPredDataset(name='ogbn-products') splitted_idx = dataset.get_idx_split() data = dataset[0] # Convert split indices to boolean masks and add them to `data`. for key, idx in splitted_idx.items(): mask = torch.zeros(data.num_nodes, dtype=torch.bool) mask[idx] = True data[f'{key}_mask'] = mask cluster_data = ClusterData(data, num_parts=args.num_partitions, recursive=False, save_dir=dataset.processed_dir) loader = ClusterLoader(cluster_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) model = SAGE(data.x.size(-1), args.hidden_channels, 47, args.num_layers, args.dropout).to(device) evaluator = Evaluator(name='ogbn-products') logger = Logger(args.runs, args) for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, 1 + args.epochs): loss = train(model, loader, optimizer, device) if epoch % args.log_steps == 0: print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}') result = test(model, data, evaluator) logger.add_result(run, result) logger.print_statistics(run) logger.print_statistics()
def _make_graph_sampler(self): graph = Data( edge_index=self.edge_index, edge_attr=self.edge_weight, n_id=torch.arange(0, self.num_nodes), num_nodes=self.num_nodes ).to('cpu') cluster_data = ClusterData( graph, num_parts=100, recursive=False, save_dir=None ) cluster_loader = ClusterLoader(cluster_data, batch_size=5, shuffle=True, num_workers=0) return cluster_loader
def process_cluster_data(self, data): """ Data processing for ClusterSelfGNN. First the data object will be clustered according to the number of partition specified by this class. Then, we randomly sample a number of clusters and merge them together. Finally, data augmentation is applied each of the final clusters. This is a simple strategy motivated by ClusterGCN and employed to improve the scalability of SelfGNN. :param data: A PyTorch Geometric Data object :return: a list of Data objects depending on the final number of clusters. """ data_list = [] clusters = [] num_parts, cluster_size = self.num_parts, self.num_parts // self.final_parts # Cluster the data cd = ClusterData(data, num_parts=num_parts) for i in range(1, cd.partptr.shape[0]): cls_nodes = cd.perm[cd.partptr[i - 1]: cd.partptr[i]] clusters.append(cls_nodes) # Randomly merge clusters and apply transformation np.random.shuffle(clusters) for i in tqdm(range(0, len(clusters), cluster_size), "Processing clusters"): end = i + cluster_size if len(clusters) - i > cluster_size else len(clusters) cls_nodes = torch.cat(clusters[i:end]).unique() x = data.x[cls_nodes] y = data.y[cls_nodes] train_mask = data.train_mask[cls_nodes] dev_mask = data.val_mask[cls_nodes] test_mask = data.test_mask[cls_nodes] edge_index, edge_attr = subgraph(cls_nodes, data.edge_index, relabel_nodes=True) view1data = Data(edge_index=edge_index, x=x, edge_attr=edge_attr, num_nodes=cls_nodes.shape[0]) view2data = view1data if self.augumentation is None else self.augumentation(view1data) if not hasattr(view2data, "edge_attr") or view2data.edge_attr is None: view2data.edge_attr = torch.ones(view2data.edge_index.shape[1]) diff = abs(view2data.x.shape[1] - view1data.x.shape[1]) if diff > 0: smaller_data = view1data if view1data.x.shape[1] < view2data.x.shape[1] else view2data smaller_data.x = F.pad(smaller_data.x, pad=(0, diff)) view1data.x = F.normalize(view1data.x) view2data.x = F.normalize(view2data.x) new_data = Data(y=y, x=view1data.x, x2=view2data.x, edge_index=view1data.edge_index, edge_index2=view2data.edge_index, edge_attr=view1data.edge_attr, edge_attr2=view2data.edge_attr, train_mask=train_mask, dev_mask=dev_mask, test_mask=test_mask, num_nodes=cls_nodes.shape[0], nodes=cls_nodes) data_list.append(new_data) print() return data_list
def run_sim(cl, lr, layer): layer_dict = {'arma': ARMAConv, 'sage': SAGEConv, 'tag': TAGConv} mat = load_npz( '/gpfs/data/rsingh47/jbigness/data/%s/hic_sparse_vcsqrt_oe_edge_v7.npz' % cl) hms = np.load( '/gpfs/data/rsingh47/jbigness/data/%s/np_hmods_norm_vcsqrt_oe_edge_v7.npy' % cl) labs = np.load( '/gpfs/data/rsingh47/jbigness/data/%s/np_nodes_lab_genes_vcsqrt_oe_edge_v7.npy' % cl) print('Data Loaded') mask = torch.tensor(labs[:, -1]).long() loc = {} for i in range(labs[:, -1].shape[0]): loc[labs[i, -1]] = i y = [] for i in range(mat.shape[0]): y.append(labs[loc[i], -2]) if i in mask else y.append(-1) y = torch.tensor(y).long() extract = torch_geometric.utils.from_scipy_sparse_matrix(mat) G = torch_geometric.data.Data( edge_index=extract[0], edge_attr=extract[1], x=torch.tensor(hms[:mat.shape[0]]).float().reshape(-1, 1, 100, 5), y=y) cluster_data = ClusterData(G, num_parts=20, recursive=False) train_loader = ClusterLoader(cluster_data, batch_size=2, shuffle=False, num_workers=0) print('Data Clustered') random.seed(30) idx = list(range(labs.shape[0] - 1)) random.shuffle(idx) train_mask = idx[:10000] test_mask = idx[10000:] net = GCN(94, 500, 400, 100, 50, 2, layer_dict[layer]) return train_model(net, train_loader, 1500, lr, train_mask, test_mask, mask)
def main(): parser = argparse.ArgumentParser( description='Link Prediction (Cluster-GCN)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--dataset', type=str, default='ogbl-citation') parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--num_partitions', type=int, default=15000) parser.add_argument('--num_workers', type=int, default=4) parser.add_argument('--num_layers', type=int, default=3) parser.add_argument('--hidden_channels', type=int, default=256) parser.add_argument('--dropout', type=float, default=0.0) parser.add_argument('--batch_size', type=int, default=256) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--epochs', type=int, default=200) parser.add_argument('--eval_steps', type=int, default=10) parser.add_argument('--runs', type=int, default=10) parser.add_argument('--negs', type=int, default=1) parser.add_argument('--gnn_type', type=str, default='gcn') args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = PygLinkPropPredDataset(name=args.dataset) data = dataset[0] data.edge_index = to_undirected(data.edge_index, data.num_nodes) print(data.edge_index.shape, data.num_nodes) cluster_data = ClusterData(data, num_parts=args.num_partitions, recursive=False, save_dir=dataset.processed_dir) loader = ClusterLoader(cluster_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) model = GCN(data.x.size(-1), args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout, gnn_type=args.gnn_type).to(device) predictor = LinkPredictor(args.hidden_channels, args.hidden_channels, 1, args.num_layers, args.dropout).to(device) evaluator = Evaluator(name=args.dataset) logger = Logger(args.runs, args) for run in range(args.runs): model.reset_parameters() predictor.reset_parameters() optimizer = torch.optim.Adam(list(model.parameters()) + list(predictor.parameters()), lr=args.lr) for epoch in range(1, 1 + args.epochs): t0 = time.time() loss = train(model, predictor, loader, optimizer, device, args.negs) tt = time.time() print(tt - t0) if epoch % args.eval_steps == 0: result = test(model, predictor, data, split_edge, evaluator, 64 * 4 * args.batch_size, device) logger.add_result(run, result) if epoch % args.log_steps == 0: train_mrr, valid_mrr, test_mrr = result print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Train: {train_mrr:.4f}, ' f'Valid: {valid_mrr:.4f}, ' f'Test: {test_mrr:.4f}') logger.print_statistics(run) logger.print_statistics()
def forward(self, X, edge_index, edge_weight): """ :param X: Input data of shape (batch_size, num_nodes, in_channels) :param edge_index: Graph connectivity in COO format with shape(2, num_edges) :param edge_weight: Edge feature matrix with shape (num_edges, num_edge_features) :return: Output data of shape (batch_size, num_nodes, out_channels) """ if torch.is_tensor(X): sz = X.shape if self.gcn_partition == 'cluster': out = torch.zeros(sz[0], sz[1], self.out_channels, device=X.device) graph_data = Data(edge_index=edge_index, edge_attr=edge_weight, train_mask=torch.arange(0, sz[1]), num_nodes=sz[1]).to('cpu') cluster_data = ClusterData(graph_data, num_parts=50, recursive=False, save_dir='data/cluster') loader = ClusterLoader(cluster_data, batch_size=5, shuffle=True, num_workers=0) for subgraph in loader: out[:, subgraph.train_mask] = self.gcn( X[:, subgraph.train_mask], subgraph.edge_index.to(X.device), subgraph.edge_attr.to(X.device)) elif self.gcn_partition == 'sample': # Use NeighborSampler() to iterates over graph nodes in a mini-batch fashion # and constructs sampled subgraphs (use cpu for no CUDA version) out = torch.zeros(sz[0], sz[1], self.out_channels, device=X.device) graph_data = Data(edge_index=edge_index, num_nodes=sz[1]).to('cpu') loader = NeighborSampler(graph_data, size=[10, 5], num_hops=2, batch_size=120, shuffle=True, add_self_loops=False) for data_flow in loader(): block1 = data_flow[0] t = self.gcn1(X, edge_index[:, block1.e_id], edge_weight[block1.e_id]) block2 = data_flow[1] part_out = self.gcn2(t, edge_index[:, block2.e_id], edge_weight[block2.e_id]) out[:, data_flow.n_id] = part_out[:, data_flow.n_id] elif self.batch_training: if self.adj_available: out = self.gcn(X, edge_index, edge_weight) else: out = self.gcn(X, edge_index) else: # Currently, conv in [GATConv] cannot use argument node_dim for batch training # This is a temp solution but it's very very very slow! # Costing about 6 times more than batch_training batch = self.get_batch(X) if self.adj_available: out = self.gcn(batch.x, edge_index, edge_weight) else: out = self.gcn(batch.x, edge_index) out = out.view(sz[0], sz[1], -1) return out
# Gather some statistics about the graph. print(f'Number of nodes: {data.num_nodes}') print(f'Number of edges: {data.num_edges}') print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}') print(f'Number of training nodes: {data.train_mask.sum()}') print( f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.3f}' ) print(f'Contains isolated nodes: {data.contains_isolated_nodes()}') print(f'Contains self-loops: {data.contains_self_loops()}') print(f'Is undirected: {data.is_undirected()}') ### Test data loader torch.manual_seed(12345) cluster_data = ClusterData(data, num_parts=128) # 1. Create subgraphs. train_loader = ClusterLoader(cluster_data, batch_size=32, shuffle=True) # 2. Stochastic partioning scheme. print() total_num_nodes = 0 for step, sub_data in enumerate(train_loader): print(f'Step {step + 1}:') print('=======') print(f'Number of nodes in the current batch: {sub_data.num_nodes}') print(sub_data) print() total_num_nodes += sub_data.num_nodes print(f'Iterated over {total_num_nodes} of {data.num_nodes} nodes!')
def main(): parser = argparse.ArgumentParser(description='OGBN-Proteins (Cluster-GCN)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--use_node_features', action='store_true') parser.add_argument('--num_partitions', type=int, default=700) parser.add_argument('--num_workers', type=int, default=6) parser.add_argument('--num_layers', type=int, default=3) parser.add_argument('--hidden_channels', type=int, default=128) parser.add_argument('--dropout', type=float, default=0.0) parser.add_argument('--batch_size', type=int, default=50) parser.add_argument('--lr', type=float, default=0.01) parser.add_argument('--epochs', type=int, default=1000) parser.add_argument('--eval_steps', type=int, default=5) parser.add_argument('--runs', type=int, default=10) args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = PygNodePropPredDataset(name='ogbn-proteins') splitted_idx = dataset.get_idx_split() data = dataset[0] # Convert split indices to boolean masks and add them to `data`. for key, idx in splitted_idx.items(): mask = torch.zeros(data.num_nodes, dtype=torch.bool) mask[idx] = True data[f'{key}_mask'] = mask cluster_data = ClusterData( data, num_parts=args.num_partitions, recursive=False, save_dir=dataset.processed_dir) if not args.use_node_features: cluster_data.data.x = torch.ones(cluster_data.data.num_nodes, 1) else: cluster_data.data.x = cluster_data.data.x.to(torch.float) loader = ClusterLoader( cluster_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) model = GIN( cluster_data.data.x.size(-1), data.edge_attr.size(-1), args.hidden_channels, 112, args.num_layers, args.dropout).to(device) evaluator = Evaluator(name='ogbn-proteins') logger = Logger(args.runs, args) for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, 1 + args.epochs): loss = train(model, loader, optimizer, device) if epoch % args.eval_steps == 0: result = test(model, loader, evaluator, device) logger.add_result(run, result) if epoch % args.log_steps == 0: train_rocauc, valid_rocauc, test_rocauc = result print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Train: {100 * train_rocauc:.2f}%, ' f'Valid: {100 * valid_rocauc:.2f}% ' f'Test: {100 * test_rocauc:.2f}%') logger.print_statistics(run) logger.print_statistics()
def run(): cluster_data = ClusterData( data, num_parts=args.num_partitions, recursive=False, save_dir=dataset.processed_dir, ) loader = ClusterLoader( cluster_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, ) model = GCN( data.x.size(-1), args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout, ).to(device) predictor = LinkPredictor(args.hidden_channels, args.hidden_channels, 1, args.num_layers, args.dropout).to(device) evaluator = Evaluator(name="ogbl-citation") logger = Logger(args.runs, args) for run in range(args.runs): model.reset_parameters() predictor.reset_parameters() optimizer = torch.optim.Adam(list(model.parameters()) + list(predictor.parameters()), lr=args.lr) for epoch in range(1, 1 + args.epochs): loss = train(model, predictor, loader, optimizer, device) print(f"Run: {run + 1:02d}, Epoch: {epoch:02d}, Loss: {loss:.4f}") if epoch > 49 and epoch % args.eval_steps == 0: result = test( model, predictor, data, split_edge, evaluator, batch_size=64 * 1024, device=device, ) logger.add_result(run, result) train_mrr, valid_mrr, test_mrr = result print(f"Run: {run + 1:02d}, " f"Epoch: {epoch:02d}, " f"Loss: {loss:.4f}, " f"Train: {train_mrr:.4f}, " f"Valid: {valid_mrr:.4f}, " f"Test: {test_mrr:.4f}") logger.print_statistics(run) logger.print_statistics()
edge_attr=edge_type, node_type=node_type, local_node_idx=local_node_idx, num_nodes=node_type.size(0), ) homo_data.y = node_type.new_full((node_type.size(0), 1), -1) homo_data.y[local2global["paper"]] = data.y_dict["paper"] homo_data.train_mask = torch.zeros((node_type.size(0)), dtype=torch.bool) homo_data.train_mask[local2global["paper"][split_idx["train"]["paper"]]] = True print(homo_data) cluster_data = ClusterData(homo_data, num_parts=5000, recursive=True, save_dir=dataset.processed_dir) train_loader = ClusterLoader(cluster_data, batch_size=500, shuffle=True, num_workers=12) # Map informations to their canonical type. x_dict = {} for key, x in data.x_dict.items(): x_dict[key2int[key]] = x num_nodes_dict = {} for key, N in data.num_nodes_dict.items(): num_nodes_dict[key2int[key]] = N
if __name__ == '__main__': adj, features, labels, mask_train,mask_test, \ y_test_oneclass, mask_test_oneclass, mask_train1, = load_data('/content/drive/My Drive/NAS-GCN-SAR/data') ###传入数据的一些处理 device = 'cuda' if torch.cuda.is_available() else 'cpu' edge_index, edge_weight = from_scipy_sparse_matrix(adj) features = torch.from_numpy(features).float() labels = torch.from_numpy(labels).float() mask_test_oneclass = torch.from_numpy( np.array(mask_test_oneclass)).to(device) y_test_oneclass = torch.from_numpy(np.array(y_test_oneclass)).to(device) ##data loader data = Data(x=features, edge_index=edge_index, y=labels) data.train_mask = torch.from_numpy(mask_train) data.test_mask = torch.from_numpy(mask_test) cluster_data = ClusterData(data, num_parts=1024, recursive=False) train_loader = ClusterLoader(cluster_data, batch_size=64, shuffle=True, num_workers=12) subgraph_loader = NeighborSampler(data.edge_index, sizes=[-1], batch_size=1024, shuffle=False, num_workers=12) main()
data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) val_data_list = [data for data in val_dataset] for data in val_data_list: data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.val_mask = torch.ones(data.num_nodes, dtype=torch.bool) data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) test_data_list = [data for data in test_dataset] for data in test_data_list: data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.val_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.test_mask = torch.ones(data.num_nodes, dtype=torch.bool) data = Batch.from_data_list(train_data_list + val_data_list + test_data_list) cluster_data = ClusterData(data, num_parts=50, recursive=False, save_dir=dataset.processed_dir) loader = ClusterLoader(cluster_data, batch_size=1, shuffle=True, num_workers=0) #Model Structure class Net(torch.nn.Module): def __init__(self, in_channels, out_channels): super(Net, self).__init__() dim = 512 self.gcn1 = ChebConv(in_channels, dim, K=1) self.lin1 = nn.Linear(in_channels, dim) self.gcn2 = ChebConv(dim, dim, K=1) self.lin2 = nn.Linear(dim, dim) self.gcn3 = ChebConv(dim, dim, K=1) self.lin3 = nn.Linear(dim, dim)
import torch import torch.nn.functional as F from torch_geometric.nn import GCNConv, GENConv from gcn import add_features_, dataloader from torch_geometric.data import InMemoryDataset from torch_geometric.data import ClusterData, ClusterLoader from sklearn.metrics import mean_squared_error, r2_score num_epochs = 20 data_, G = dataloader() data_ = add_features_(data_, G) dataset = data_ print(dataset) # dataset = InMemoryDataset.collate(data) cluster_data = ClusterData(data_, num_parts=50, recursive=False) test_mask = cluster_data train_loader = ClusterLoader(cluster_data, batch_size=5, shuffle=True, num_workers=12) class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = GCNConv(dataset.num_node_features, 32) # self.conv2 = GCNConv(16, dataset.num_classes) self.conv3 = GCNConv(32, 16) self.conv2 = GCNConv(16, dataset.y.shape[1])
def main(args): # Set up logging and devices args.save_dir = get_save_dir(args.save_dir, args.name + '-' + args.dataset + '-' + str(args.hidden_dim) + '-' + str(args.max_forward_iterations) + '-' + args.reg_loss_type + '-' + args.embed_type + '-' + args.init_type, training=True) log = get_logger(args.save_dir, args.name) tboard = SummaryWriter(args.save_dir) device, args.gpu_ids = get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get data loader log.info('Building dataset...') dataset, split_idx, evaluator = load_pyg_dataset(args.dataset) data = dataset[0] # Attach the node idx to the data data['orig_node_idx'] = torch.arange(data.x.shape[0]) # Convert split indices to boolean masks and add them to `data`. for key, idx in split_idx.items(): mask = torch.zeros(data.num_nodes, dtype=torch.bool) mask[idx] = True data[f'{key}_mask'] = mask cluster_data = ClusterData(data, num_parts=args.num_partitions, recursive=False, save_dir=dataset.processed_dir) dataset_loader = CustomClusterLoader(cluster_data, batch_size=args.batch_size, shuffle=args.data_shuffle, num_workers=args.num_workers, normalize_adj_matrix=args.normalize_adj_matrix) num_nodes = data.num_nodes # If the node features is a zero tensor with dimension one # re-create it here as a (num_nodes, num_nodes) sparse identity matrix if data.num_node_features == 1 and torch.equal(data['x'], torch.zeros(data.num_nodes, data.num_node_features)): node_features = sp.identity(data.num_nodes/len(dataset_loader)) node_features = sparse_mx_to_torch_sparse_tensor(node_features).float() data.x = node_features # Get model log.info('Building model...') # Create the model, optimizer and checkpoint model_class = str_to_attribute(sys.modules['models'], args.name) model = model_class(data.x.shape[-1], dataset.num_classes, args, log, orig_num_nodes=num_nodes) model = DataParallelWrapper(model) if args.load_path: log.info(f'Loading checkpoint from {args.load_path}...') model = load_model(model, args.load_path, args.gpu_ids) else: # Reset parameters only if not loading from checkpoint model.reset_parameters() model = model.to(device) model.train() # Get optimizer and scheduler parameters = [p for p in model.parameters() if p.requires_grad] if args.optimizer == 'Adam': optimizer = optim.Adam(parameters, args.learning_rate, weight_decay=args.weight_decay) elif args.optimizer == 'SGD': optimizer = optim.SGD(parameters, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'Adadelta': optimizer = optim.Adadelta(parameters, args.learning_rate, weight_decay=args.weight_decay) elif args.optimizer == 'Adamax': optimizer = optim.Adamax(parameters, args.learning_rate, weight_decay=args.weight_decay) # Get saver saver = CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Train log.info('Training...') with tqdm.tqdm(total=args.num_epochs) as progress_bar: for epoch in range(args.num_epochs): # Train and display the stats train_results = train(model, dataset_loader, optimizer, device, evaluator, args) # Log the metrics train_log_message = ''.join('{} - {}; '.format(k, v) for k, v in train_results.items()) # Visualize in TensorBoard for k, v in train_results.items(): tboard.add_scalar(f'train/{k}', v, epoch) # Evaluate, display the stats and save the model dev_results = evaluate(model, dataset_loader, device, evaluator, args) # Save the model saver.save(epoch, model, dev_results[args.metric_name], device) # Log the metrics dev_log_message = ''.join('{} - {}; '.format(k, v) for k, v in dev_results.items()) # Visualize in TensorBoard for k, v in dev_results.items(): tboard.add_scalar(f'eval/{k}', v, epoch) log.info(f'Epoch: {epoch} - Training - {train_log_message} - Evaluating - {dev_log_message}') progress_bar.update(1) progress_bar.set_postfix(eval_loss=dev_results['loss'])
val_data.edge_index = torch_geometric.utils.subgraph(data.val_mask, data.edge_index, relabel_nodes=True)[0] val_data.mask = data.val_mask print(val_data) test_data = Data() test_data.x = data.x[data.test_mask] test_data.y = data.y[data.test_mask] test_data.edge_index = torch_geometric.utils.subgraph(data.test_mask, data.edge_index, relabel_nodes=True)[0] test_data.mask = data.test_mask print(test_data) train_data = ClusterData(train_data, num_parts=1500, recursive=False, save_dir="data/Reddit/train") val_data = ClusterData(val_data, num_parts=20, recursive=False, save_dir="data/Reddit/val") test_data = ClusterData(test_data, num_parts=1, recursive=False, save_dir="data/Reddit/test") train_loader = ClusterLoader(train_data, batch_size=20, shuffle=True, num_workers=8) val_loader = ClusterLoader(val_data,
def main(args): # Set up logging and devices args.save_dir = get_save_dir(args.save_dir, 'test', training=True) log = get_logger(args.save_dir, 'test') tboard = SummaryWriter(args.save_dir) device, args.gpu_ids = get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get data loader log.info('Building dataset...') # Download and process data at './dataset/xxx' dataset = PygNodePropPredDataset(name=args.dataset, root='dataset/') evaluator = Evaluator(name=args.dataset) split_idx = dataset.get_idx_split() data = dataset[0] # Convert split indices to boolean masks and add them to `data`. for key, idx in split_idx.items(): mask = torch.zeros(data.num_nodes, dtype=torch.bool) mask[idx] = True data[f'{key}_mask'] = mask cluster_data = ClusterData(data, num_parts=args.num_partitions, recursive=False, save_dir=dataset.processed_dir) dataset_loader = ClusterLoader(cluster_data, batch_size=args.batch_size, shuffle=args.data_shuffle, num_workers=args.num_workers) # Get model log.info('Building model...') model = load_full_model(args.load_path, args.gpu_ids) model = nn.DataParallel(model) model = model.to(device) model.eval() # Test log.info('Testing...') # Evaluate, display the stats and save the model dev_results = test(model, dataset_loader, device, evaluator) # Log the metrics dev_log_message = ''.join('{} - {}; '.format(k, v) for k, v in dev_results.items()) log.info(f'Testing - {dev_log_message}')
def main(): parser = argparse.ArgumentParser(description='OGBN-Products (Cluster-GCN)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--num_partitions', type=int, default=15000) parser.add_argument('--num_workers', type=int, default=12) parser.add_argument('--num_layers', type=int, default=3) parser.add_argument('--hidden_channels', type=int, default=256) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--eval_steps', type=int, default=5) parser.add_argument('--runs', type=int, default=10) parser.add_argument('--step-size', type=float, default=8e-3) parser.add_argument('-m', type=int, default=3) parser.add_argument('--test-freq', type=int, default=5) parser.add_argument('--attack', type=str, default='flag') parser.add_argument('--amp', type=float, default=2) args = parser.parse_args() device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = PygNodePropPredDataset(name='ogbn-products') split_idx = dataset.get_idx_split() data = dataset[0] # Convert split indices to boolean masks and add them to `data`. for key, idx in split_idx.items(): mask = torch.zeros(data.num_nodes, dtype=torch.bool) mask[idx] = True data[f'{key}_mask'] = mask cluster_data = ClusterData(data, num_parts=args.num_partitions, recursive=False, save_dir=dataset.processed_dir) loader = ClusterLoader(cluster_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) subgraph_loader = NeighborSampler(data.edge_index, sizes=[-1], batch_size=1024, shuffle=False, num_workers=args.num_workers) model = SAGE(data.x.size(-1), args.hidden_channels, dataset.num_classes, args.num_layers, args.dropout).to(device) evaluator = Evaluator(name='ogbn-products') vals, tests = [], [] for run in range(args.runs): best_val, final_test = 0, 0 model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, args.epochs + 1): loss, acc = train_flag(model, loader, optimizer, device, args) if epoch > 19 and epoch % args.test_freq == 0 or epoch == args.epochs: result = test(model, data, evaluator, subgraph_loader, device) train, val, tst = result if val > best_val: best_val = val final_test = tst print(f'Run{run} val:{best_val}, test:{final_test}') vals.append(best_val) tests.append(final_test) print('') print(f"Average val accuracy: {np.mean(vals)} ± {np.std(vals)}") print(f"Average test accuracy: {np.mean(tests)} ± {np.std(tests)}")
data = dataset[0] dataset_test(data) if args.multi_gpu: # Unit test: GPU number verification # Prepare model device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = parse_model_name(args.model, dataset) model = DataParallel(model) model = model.to(device) #Split graph into subgraphs if args.subgraph_scheme == 'cluster': # Split data into subgraphs using cluster methods data_list = list(ClusterData(data, num_parts=args.num_parts)) elif args.subgraph_scheme == 'neighbor': data_list = list( NeighborSubgraphLoader(data, batch_size=args.neighbor_batch_size)) print( f'Using neighbor sampling | number of subgraphs: {len(data_list)}' ) # Run the model for each batch size setups batch_sizes = np.array(list(range(1, 65))) * 4 batch_running_time = [] for batch_size in batch_sizes: batch_size = int(batch_size) loader = DataListLoader(data_list, batch_size=batch_size,
def main(): parser = argparse.ArgumentParser(description='OGBL-Citation (Cluster-GCN)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--num_partitions', type=int, default=15000) parser.add_argument('--num_workers', type=int, default=12) parser.add_argument('--num_layers', type=int, default=3) parser.add_argument('--hidden_channels', type=int, default=256) parser.add_argument('--dropout', type=float, default=0.0) parser.add_argument('--batch_size', type=int, default=256) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--epochs', type=int, default=200) parser.add_argument('--eval_steps', type=int, default=10) parser.add_argument('--runs', type=int, default=10) args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = PygLinkPropPredDataset(name='ogbl-citation') split_edge = dataset.get_edge_split() data = dataset[0] data.edge_index = to_undirected(data.edge_index, data.num_nodes) cluster_data = ClusterData(data, num_parts=args.num_partitions, recursive=False, save_dir=dataset.processed_dir) loader = ClusterLoader(cluster_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) # We randomly pick some training samples that we want to evaluate on: torch.manual_seed(12345) idx = torch.randperm(split_edge['train']['source_node'].numel())[:86596] split_edge['eval_train'] = { 'source_node': split_edge['train']['source_node'][idx], 'target_node': split_edge['train']['target_node'][idx], 'target_node_neg': split_edge['valid']['target_node_neg'], } model = GCN(data.x.size(-1), args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout).to(device) predictor = LinkPredictor(args.hidden_channels, args.hidden_channels, 1, args.num_layers, args.dropout).to(device) evaluator = Evaluator(name='ogbl-citation') logger = Logger(args.runs, args) for run in range(args.runs): model.reset_parameters() predictor.reset_parameters() optimizer = torch.optim.Adam( list(model.parameters()) + list(predictor.parameters()), lr=args.lr) for epoch in range(1, 1 + args.epochs): loss = train(model, predictor, loader, optimizer, device) print(f'Run: {run + 1:02d}, Epoch: {epoch:02d}, Loss: {loss:.4f}') if epoch > 49 and epoch % args.eval_steps == 0: result = test(model, predictor, data, split_edge, evaluator, batch_size=64 * 1024, device=device) logger.add_result(run, result) train_mrr, valid_mrr, test_mrr = result print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Train: {train_mrr:.4f}, ' f'Valid: {valid_mrr:.4f}, ' f'Test: {test_mrr:.4f}') logger.print_statistics(run) logger.print_statistics()
def test_cluster_gcn(): adj = torch.tensor([ [1, 1, 1, 0, 1, 0], [1, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1], ]) x = torch.Tensor([[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]) edge_index = adj.nonzero(as_tuple=False).t() edge_attr = torch.arange(edge_index.size(1)) data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr) data.num_nodes = 6 cluster_data = ClusterData(data, num_parts=2, log=False) assert cluster_data.partptr.tolist() == [0, 3, 6] assert cluster_data.perm.tolist() == [0, 2, 4, 1, 3, 5] assert cluster_data.data.x.tolist() == [ [0, 0], [2, 2], [4, 4], [1, 1], [3, 3], [5, 5], ] assert cluster_data.data.adj.to_dense().tolist() == [ [0, 2, 3, 1, 0, 0], [8, 9, 10, 0, 0, 0], [14, 15, 16, 0, 0, 0], [4, 0, 0, 5, 6, 7], [0, 0, 0, 11, 12, 13], [0, 0, 0, 17, 18, 19], ] data = cluster_data[0] assert data.num_nodes == 3 assert data.x.tolist() == [[0, 0], [2, 2], [4, 4]] assert data.edge_index.tolist() == [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]] assert data.edge_attr.tolist() == [0, 2, 3, 8, 9, 10, 14, 15, 16] data = cluster_data[1] assert data.num_nodes == 3 assert data.x.tolist() == [[1, 1], [3, 3], [5, 5]] assert data.edge_index.tolist() == [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]] assert data.edge_attr.tolist() == [5, 6, 7, 11, 12, 13, 17, 18, 19] loader = ClusterLoader(cluster_data, batch_size=1) iterator = iter(loader) data = next(iterator) assert data.x.tolist() == [[0, 0], [2, 2], [4, 4]] assert data.edge_index.tolist() == [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]] data = next(iterator) assert data.x.tolist() == [[1, 1], [3, 3], [5, 5]] assert data.edge_index.tolist() == [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]] torch.manual_seed(1) loader = ClusterLoader(cluster_data, batch_size=2, shuffle=True) data = next(iter(loader)) assert data.num_nodes == 6 assert data.x.tolist() == [ [0, 0], [2, 2], [4, 4], [1, 1], [3, 3], [5, 5], ] assert to_dense_adj(data.edge_index).squeeze().tolist() == [ [1, 1, 1, 1, 0, 0], [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0], [1, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], ] torch.manual_seed(2) loader = ClusterLoader(cluster_data, batch_size=2, shuffle=True) data = next(iter(loader)) assert data.num_nodes == 6 assert data.x.tolist() == [ [1, 1], [3, 3], [5, 5], [0, 0], [2, 2], [4, 4], ] assert to_dense_adj(data.edge_index).squeeze().tolist() == [ [1, 1, 1, 1, 0, 0], [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0], [1, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], ] loader = ClusterLoader(cluster_data, batch_size=1, shuffle=True) data = next(iter(loader)) assert data.num_nodes == 3
def main(): parser = argparse.ArgumentParser(description='OGBN-Products (Cluster-GCN)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--num_partitions', type=int, default=15000) parser.add_argument('--num_workers', type=int, default=12) parser.add_argument('--num_layers', type=int, default=3) parser.add_argument('--hidden_channels', type=int, default=256) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--eval_steps', type=int, default=5) parser.add_argument('--runs', type=int, default=10) args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = PygNodePropPredDataset(name='ogbn-products') split_idx = dataset.get_idx_split() data = dataset[0] # Convert split indices to boolean masks and add them to `data`. for key, idx in split_idx.items(): mask = torch.zeros(data.num_nodes, dtype=torch.bool) mask[idx] = True data[f'{key}_mask'] = mask cluster_data = ClusterData(data, num_parts=args.num_partitions, recursive=False, save_dir=dataset.processed_dir) loader = ClusterLoader(cluster_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) subgraph_loader = NeighborSampler(data.edge_index, sizes=[-1], batch_size=1024, shuffle=False, num_workers=args.num_workers) model = GCN(data.x.size(-1), args.hidden_channels, dataset.num_classes, args.num_layers, args.dropout).to(device) evaluator = Evaluator(name='ogbn-products') logger = Logger(args.runs, args) logger_orig = Logger(args.runs, args) adj = process_adj(data) for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) best_valid = 0 best_out = None for epoch in range(1, 1 + args.epochs): loss, train_acc = train(model, loader, optimizer, device) if epoch % args.log_steps == 0: print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Approx Train Acc: {train_acc:.4f}') if epoch > 19 and epoch % args.eval_steps == 0: out, result = test(model, data, evaluator, subgraph_loader, device) logger_orig.add_result(run, result) train_acc, valid_acc, test_acc = result print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Train: {100 * train_acc:.2f}%, ' f'Valid: {100 * valid_acc:.2f}% ' f'Test: {100 * test_acc:.2f}%') logger.print_statistics(run) logger.print_statistics() logger_orig.print_statistics()
def process_cluster_data(self, data): """ Augmented view data generation based on clustering. :param data: :return: """ data_list = [] clusters = [] num_parts, cluster_size = self.num_parts, self.num_parts // self.final_parts # Cluster the data cd = ClusterData(data, num_parts=num_parts) for i in range(1, cd.partptr.shape[0]): cls_nodes = cd.perm[cd.partptr[i - 1]:cd.partptr[i]] clusters.append(cls_nodes) # Randomly merge clusters and apply transformation np.random.shuffle(clusters) for i in range(0, len(clusters), cluster_size): end = i + cluster_size if len( clusters) - i > cluster_size else len(clusters) cls_nodes = torch.cat(clusters[i:end]).unique() sys.stdout.write( f'\rProcessing cluster {i + 1}/{len(clusters)} with {self.final_parts} nodes' ) sys.stdout.flush() x = data.x[cls_nodes] y = data.y[cls_nodes] train_mask = data.train_mask[cls_nodes] dev_mask = data.val_mask[cls_nodes] test_mask = data.test_mask[cls_nodes] edge_index, edge_attr = subgraph(cls_nodes, data.edge_index, relabel_nodes=True) data = Data(edge_index=edge_index, x=x, edge_attr=edge_attr, num_nodes=cls_nodes.shape[0]) view1data, view2data = self.augumentation(data) if not hasattr(view1data, "edge_attr") or view1data.edge_attr is None: view1data.edge_attr = torch.ones(view1data.edge_index.shape[1]) if not hasattr(view2data, "edge_attr") or view2data.edge_attr is None: view2data.edge_attr = torch.ones(view2data.edge_index.shape[1]) diff = abs(view2data.x.shape[1] - view1data.x.shape[1]) if diff > 0: smaller_data = view1data if view1data.x.shape[ 1] < view2data.x.shape[1] else view2data smaller_data.x = F.pad(smaller_data.x, pad=(0, diff)) view1data.x = F.normalize(view1data.x) view2data.x = F.normalize(view2data.x) print(view1data) print(view2data) new_data = Data(y=y, x1=view1data.x, x2=view2data.x, edge_index1=view1data.edge_index, edge_index2=view2data.edge_index, edge_attr1=view1data.edge_attr, edge_attr2=view2data.edge_attr, train_mask=train_mask, dev_mask=dev_mask, test_mask=test_mask, num_nodes=cls_nodes.shape[0], nodes=cls_nodes) data_list.append(new_data) print() return data_list
def test_cluster_gcn(): adj = torch.tensor([ [1, 1, 1, 0, 1, 0], [1, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1], ]) edge_index = adj.nonzero().t() x = torch.Tensor([[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]) data = Data(edge_index=edge_index, x=x, num_nodes=6) cluster_data = ClusterData(data, num_parts=2, log=False) assert cluster_data.partptr.tolist() == [0, 3, 6] assert cluster_data.perm.tolist() == [0, 2, 4, 1, 3, 5] assert cluster_data.data.x.tolist() == [ [0, 0], [2, 2], [4, 4], [1, 1], [3, 3], [5, 5], ] assert cluster_data.data.adj.to_dense().tolist() == [ [1, 1, 1, 1, 0, 0], [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0], [1, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], ] data = cluster_data[0] assert data.x.tolist() == [[0, 0], [2, 2], [4, 4]] assert data.edge_index.tolist() == [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]] data = cluster_data[1] assert data.x.tolist() == [[1, 1], [3, 3], [5, 5]] assert data.edge_index.tolist() == [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]] loader = ClusterLoader(cluster_data, batch_size=1) it = iter(loader) data = next(it) assert data.x.tolist() == [[0, 0], [2, 2], [4, 4]] assert data.edge_index.tolist() == [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]] data = next(it) assert data.x.tolist() == [[1, 1], [3, 3], [5, 5]] assert data.edge_index.tolist() == [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]] torch.manual_seed(1) loader = ClusterLoader(cluster_data, batch_size=2, shuffle=True) data = next(iter(loader)) assert data.x.tolist() == [ [0, 0], [2, 2], [4, 4], [1, 1], [3, 3], [5, 5], ] assert to_dense_adj(data.edge_index).squeeze().tolist() == [ [1, 1, 1, 1, 0, 0], [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0], [1, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], ] torch.manual_seed(2) loader = ClusterLoader(cluster_data, batch_size=2, shuffle=True) data = next(iter(loader)) assert data.x.tolist() == [ [1, 1], [3, 3], [5, 5], [0, 0], [2, 2], [4, 4], ] assert to_dense_adj(data.edge_index).squeeze().tolist() == [ [1, 1, 1, 1, 0, 0], [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0], [1, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], ]
labels = torch.from_numpy(labels).float() mask_test_oneclass = torch.from_numpy(np.array(mask_test_oneclass)) y_test_oneclass = torch.from_numpy(np.array(y_test_oneclass)) ##data loader data = Data(x=features, edge_index=edge_index, y=labels) data.train_mask = torch.from_numpy(mask_train) data.test_mask = torch.from_numpy(mask_test) data.mask_test_oneclass = mask_test_oneclass data.y_test_oneclass = y_test_oneclass total_test_oneclass = [] for i in range(args.classes): total_test_oneclass.append(mask_test_oneclass[i].sum()) cluster_data = ClusterData(data, num_parts=2000, recursive=False, save_dir='./data') train_loader = ClusterLoader(cluster_data, batch_size=150, shuffle=True, num_workers=12) subgraph_loader = NeighborSampler(data.edge_index, sizes=[-1], batch_size=1024, shuffle=False, num_workers=12) ######!!!!这里选择结构 genotype = eval("genotypes.%s" % args.arch) #eval()执行一个字符串表达式,并返回表达式的值。 model = Network(args.init_channels, args.classes,