def get_dataset(name, root, use_sparse_tensor): path = osp.join(osp.dirname(osp.realpath(__file__)), root, name) transform = T.ToSparseTensor() if use_sparse_tensor else None if name == 'ogbn-mag': if transform is None: transform = T.ToUndirected(merge=True) else: transform = T.Compose([T.ToUndirected(merge=True), transform]) dataset = OGB_MAG(root=path, preprocess='metapath2vec', transform=transform) elif name == 'ogbn-products': dataset = PygNodePropPredDataset('ogbn-products', root=path, transform=transform) elif name == 'Reddit': dataset = Reddit(root=path, transform=transform) return dataset[0], dataset.num_classes
def create_hetero_mock_data(n_count, feature_dict): _x_dict = { 'author': torch.FloatTensor( np.random.uniform(0, 1, (n_count, feature_dict['author']))), 'paper': torch.FloatTensor( np.random.uniform(0, 1, (n_count, feature_dict['paper']))) } _edge_index_dict = { ('author', 'writes', 'paper'): torch.LongTensor(get_edge_array(n_count)) } data = HeteroData() data['author'].x = _x_dict['author'] data['paper'].x = _x_dict['paper'] data[('author', 'writes', 'paper')].edge_index = _edge_index_dict[('author', 'writes', 'paper')] data = T.ToUndirected()(data) return data.x_dict, data.edge_index_dict, data.metadata()
import os.path as osp from ogb.nodeproppred import PygNodePropPredDataset, Evaluator import torch_geometric.transforms as T from torch_geometric.nn import LabelPropagation root = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'OGB') dataset = PygNodePropPredDataset('ogbn-arxiv', root, transform=T.Compose([ T.ToUndirected(), T.ToSparseTensor(), ])) split_idx = dataset.get_idx_split() evaluator = Evaluator(name='ogbn-arxiv') data = dataset[0] model = LabelPropagation(num_layers=3, alpha=0.9) out = model(data.y, data.adj_t, mask=split_idx['train']) y_pred = out.argmax(dim=-1, keepdim=True) val_acc = evaluator.eval({ 'y_true': data.y[split_idx['valid']], 'y_pred': y_pred[split_idx['valid']], })['acc'] test_acc = evaluator.eval({ 'y_true': data.y[split_idx['test']], 'y_pred': y_pred[split_idx['test']], })['acc']
def __init__(self, root: str): super().__init__() self.root = root self.transform = T.ToUndirected(merge=False)
action='store_true', help='Whether to use weighted MSE loss.') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/MovieLens') dataset = MovieLens(path, model_name='all-MiniLM-L6-v2') data = dataset[0].to(device) # Add user node features for message passing: data['user'].x = torch.eye(data['user'].num_nodes, device=device) del data['user'].num_nodes # Add a reverse ('movie', 'rev_rates', 'user') relation for message passing: data = T.ToUndirected()(data) del data['movie', 'rev_rates', 'user'].edge_label # Remove "reverse" label. # Perform a link-level split into training, validation, and test edges: train_data, val_data, test_data = T.RandomLinkSplit( num_val=0.1, num_test=0.1, neg_sampling_ratio=0.0, edge_types=[('user', 'rates', 'movie')], rev_edge_types=[('movie', 'rev_rates', 'user')], )(data) # We have an unbalanced dataset with many labels for rating 3 and 4, and very # few for 0 and 1. Therefore we use a weighted MSE loss. if args.use_weighted_loss: weight = torch.bincount(train_data['user', 'movie'].edge_label)
import os.path as osp import torch import torch.nn.functional as F from ogb.nodeproppred import PygNodePropPredDataset import torch_geometric.transforms as T from torch_geometric.nn import MaskLabel, TransformerConv from torch_geometric.utils import index_to_mask root = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'OGB') dataset = PygNodePropPredDataset('ogbn-arxiv', root, T.ToUndirected()) class UniMP(torch.nn.Module): def __init__(self, in_channels, num_classes, hidden_channels, num_layers, heads, dropout=0.3): super().__init__() self.label_emb = MaskLabel(num_classes, in_channels) self.convs = torch.nn.ModuleList() self.norms = torch.nn.ModuleList() for i in range(1, num_layers + 1): if i < num_layers: out_channels = hidden_channels // heads
from torch.nn import ReLU from tqdm import tqdm import torch_geometric.transforms as T from torch_geometric.datasets import OGB_MAG from torch_geometric.loader import HGTLoader, NeighborLoader from torch_geometric.nn import Linear, SAGEConv, Sequential, to_hetero parser = argparse.ArgumentParser() parser.add_argument('--use_hgt_loader', action='store_true') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/OGB') transform = T.ToUndirected(merge=True) dataset = OGB_MAG(path, preprocess='metapath2vec', transform=transform) # Already send node features/labels to GPU for faster access during sampling: data = dataset[0].to(device, 'x', 'y') train_input_nodes = ('paper', data['paper'].train_mask) val_input_nodes = ('paper', data['paper'].val_mask) kwargs = {'batch_size': 1024, 'num_workers': 6, 'persistent_workers': True} if not args.use_hgt_loader: train_loader = NeighborLoader(data, num_neighbors=[10] * 2, shuffle=True, input_nodes=train_input_nodes, **kwargs)
def run(args: argparse.ArgumentParser) -> None: for dataset_name in args.datasets: print(f"Dataset: {dataset_name}") root = osp.join(args.root, dataset_name) if dataset_name == 'mag': transform = T.ToUndirected(merge=True) dataset = OGB_MAG(root=root, transform=transform) train_idx = ('paper', dataset[0]['paper'].train_mask) eval_idx = ('paper', None) neighbor_sizes = args.hetero_neighbor_sizes else: dataset = PygNodePropPredDataset(f'ogbn-{dataset_name}', root) split_idx = dataset.get_idx_split() train_idx = split_idx['train'] eval_idx = None neighbor_sizes = args.homo_neighbor_sizes data = dataset[0].to(args.device) for num_neighbors in neighbor_sizes: print(f'Training sampling with {num_neighbors} neighbors') for batch_size in args.batch_sizes: train_loader = NeighborLoader( data, num_neighbors=num_neighbors, input_nodes=train_idx, batch_size=batch_size, shuffle=True, num_workers=args.num_workers, ) runtimes = [] num_iterations = 0 for run in range(args.runs): start = default_timer() for batch in tqdm.tqdm(train_loader): num_iterations += 1 stop = default_timer() runtimes.append(round(stop - start, 3)) average_time = round(sum(runtimes) / args.runs, 3) print(f'batch size={batch_size}, iterations={num_iterations}, ' f'runtimes={runtimes}, average runtime={average_time}') print('Evaluation sampling with all neighbors') for batch_size in args.eval_batch_sizes: subgraph_loader = NeighborLoader( data, num_neighbors=[-1], input_nodes=eval_idx, batch_size=batch_size, shuffle=False, num_workers=args.num_workers, ) runtimes = [] num_iterations = 0 for run in range(args.runs): start = default_timer() for batch in tqdm.tqdm(subgraph_loader): num_iterations += 1 stop = default_timer() runtimes.append(round(stop - start, 3)) average_time = round(sum(runtimes) / args.runs, 3) print(f'batch size={batch_size}, iterations={num_iterations}, ' f'runtimes={runtimes}, average runtime={average_time}')
else: x = torch.tensor(tmp_event[['charge_log10','time','dom_x','dom_y','dom_z']].values,dtype=torch.float) #Features pos = torch.tensor(tmp_event[['dom_x','dom_y','dom_z']].values,dtype=torch.float) #Position query = "SELECT energy_log10, time, position_x, position_y, position_z, direction_x, direction_y, direction_z, azimuth, zenith FROM truth WHERE event_no = {}".format(event_no) y = pd.read_sql(query,con) y = torch.tensor(y.values,dtype=torch.float) #Target dat = Data(x=x,edge_index=None,edge_attr=None,y=y,pos=pos) # T.KNNGraph(loop=True)(dat) #defining edges by k-NN with k=6 !!! Make sure .pos is not scaled!!! ie. x,y,z -!-> ax,by,cz T.KNNGraph(k=6, loop=False, force_undirected = False)(dat) dat.adj_t = None T.ToUndirected()(dat) T.AddSelfLoops()(dat) (row, col) = dat.edge_index dat.edge_index = torch.stack([col,row],dim=0) data_list.append(dat) if (i+1) % subdivides == 0: data, slices = InMemoryDataset.collate(data_list) torch.save((data,slices), destination + '/{}k_{}{}.pt'.format(subdivides//1000,save_filename,subset)) subset += 1 data_list = [] #Does this free up the memory? if i % 500 == 0: print("{}: Completed {}/{}".format(datetime.now(),i,N))