Beispiel #1
0
def get_dataset(name, root, use_sparse_tensor):
    path = osp.join(osp.dirname(osp.realpath(__file__)), root, name)
    transform = T.ToSparseTensor() if use_sparse_tensor else None
    if name == 'ogbn-mag':
        if transform is None:
            transform = T.ToUndirected(merge=True)
        else:
            transform = T.Compose([T.ToUndirected(merge=True), transform])
        dataset = OGB_MAG(root=path,
                          preprocess='metapath2vec',
                          transform=transform)
    elif name == 'ogbn-products':
        dataset = PygNodePropPredDataset('ogbn-products',
                                         root=path,
                                         transform=transform)
    elif name == 'Reddit':
        dataset = Reddit(root=path, transform=transform)

    return dataset[0], dataset.num_classes
Beispiel #2
0
def create_hetero_mock_data(n_count, feature_dict):
    _x_dict = {
        'author':
        torch.FloatTensor(
            np.random.uniform(0, 1, (n_count, feature_dict['author']))),
        'paper':
        torch.FloatTensor(
            np.random.uniform(0, 1, (n_count, feature_dict['paper'])))
    }
    _edge_index_dict = {
        ('author', 'writes', 'paper'):
        torch.LongTensor(get_edge_array(n_count))
    }

    data = HeteroData()
    data['author'].x = _x_dict['author']
    data['paper'].x = _x_dict['paper']
    data[('author', 'writes',
          'paper')].edge_index = _edge_index_dict[('author', 'writes',
                                                   'paper')]
    data = T.ToUndirected()(data)

    return data.x_dict, data.edge_index_dict, data.metadata()
import os.path as osp

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
import torch_geometric.transforms as T
from torch_geometric.nn import LabelPropagation

root = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'OGB')
dataset = PygNodePropPredDataset('ogbn-arxiv',
                                 root,
                                 transform=T.Compose([
                                     T.ToUndirected(),
                                     T.ToSparseTensor(),
                                 ]))
split_idx = dataset.get_idx_split()
evaluator = Evaluator(name='ogbn-arxiv')
data = dataset[0]

model = LabelPropagation(num_layers=3, alpha=0.9)
out = model(data.y, data.adj_t, mask=split_idx['train'])

y_pred = out.argmax(dim=-1, keepdim=True)

val_acc = evaluator.eval({
    'y_true': data.y[split_idx['valid']],
    'y_pred': y_pred[split_idx['valid']],
})['acc']
test_acc = evaluator.eval({
    'y_true': data.y[split_idx['test']],
    'y_pred': y_pred[split_idx['test']],
})['acc']
Beispiel #4
0
 def __init__(self, root: str):
     super().__init__()
     self.root = root
     self.transform = T.ToUndirected(merge=False)
Beispiel #5
0
                    action='store_true',
                    help='Whether to use weighted MSE loss.')
args = parser.parse_args()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/MovieLens')
dataset = MovieLens(path, model_name='all-MiniLM-L6-v2')
data = dataset[0].to(device)

# Add user node features for message passing:
data['user'].x = torch.eye(data['user'].num_nodes, device=device)
del data['user'].num_nodes

# Add a reverse ('movie', 'rev_rates', 'user') relation for message passing:
data = T.ToUndirected()(data)
del data['movie', 'rev_rates', 'user'].edge_label  # Remove "reverse" label.

# Perform a link-level split into training, validation, and test edges:
train_data, val_data, test_data = T.RandomLinkSplit(
    num_val=0.1,
    num_test=0.1,
    neg_sampling_ratio=0.0,
    edge_types=[('user', 'rates', 'movie')],
    rev_edge_types=[('movie', 'rev_rates', 'user')],
)(data)

# We have an unbalanced dataset with many labels for rating 3 and 4, and very
# few for 0 and 1. Therefore we use a weighted MSE loss.
if args.use_weighted_loss:
    weight = torch.bincount(train_data['user', 'movie'].edge_label)
import os.path as osp

import torch
import torch.nn.functional as F
from ogb.nodeproppred import PygNodePropPredDataset

import torch_geometric.transforms as T
from torch_geometric.nn import MaskLabel, TransformerConv
from torch_geometric.utils import index_to_mask

root = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'OGB')
dataset = PygNodePropPredDataset('ogbn-arxiv', root, T.ToUndirected())


class UniMP(torch.nn.Module):
    def __init__(self,
                 in_channels,
                 num_classes,
                 hidden_channels,
                 num_layers,
                 heads,
                 dropout=0.3):
        super().__init__()

        self.label_emb = MaskLabel(num_classes, in_channels)

        self.convs = torch.nn.ModuleList()
        self.norms = torch.nn.ModuleList()
        for i in range(1, num_layers + 1):
            if i < num_layers:
                out_channels = hidden_channels // heads
from torch.nn import ReLU
from tqdm import tqdm

import torch_geometric.transforms as T
from torch_geometric.datasets import OGB_MAG
from torch_geometric.loader import HGTLoader, NeighborLoader
from torch_geometric.nn import Linear, SAGEConv, Sequential, to_hetero

parser = argparse.ArgumentParser()
parser.add_argument('--use_hgt_loader', action='store_true')
args = parser.parse_args()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/OGB')
transform = T.ToUndirected(merge=True)
dataset = OGB_MAG(path, preprocess='metapath2vec', transform=transform)

# Already send node features/labels to GPU for faster access during sampling:
data = dataset[0].to(device, 'x', 'y')

train_input_nodes = ('paper', data['paper'].train_mask)
val_input_nodes = ('paper', data['paper'].val_mask)
kwargs = {'batch_size': 1024, 'num_workers': 6, 'persistent_workers': True}

if not args.use_hgt_loader:
    train_loader = NeighborLoader(data,
                                  num_neighbors=[10] * 2,
                                  shuffle=True,
                                  input_nodes=train_input_nodes,
                                  **kwargs)
def run(args: argparse.ArgumentParser) -> None:
    for dataset_name in args.datasets:
        print(f"Dataset: {dataset_name}")
        root = osp.join(args.root, dataset_name)

        if dataset_name == 'mag':
            transform = T.ToUndirected(merge=True)
            dataset = OGB_MAG(root=root, transform=transform)
            train_idx = ('paper', dataset[0]['paper'].train_mask)
            eval_idx = ('paper', None)
            neighbor_sizes = args.hetero_neighbor_sizes
        else:
            dataset = PygNodePropPredDataset(f'ogbn-{dataset_name}', root)
            split_idx = dataset.get_idx_split()
            train_idx = split_idx['train']
            eval_idx = None
            neighbor_sizes = args.homo_neighbor_sizes

        data = dataset[0].to(args.device)

        for num_neighbors in neighbor_sizes:
            print(f'Training sampling with {num_neighbors} neighbors')
            for batch_size in args.batch_sizes:
                train_loader = NeighborLoader(
                    data,
                    num_neighbors=num_neighbors,
                    input_nodes=train_idx,
                    batch_size=batch_size,
                    shuffle=True,
                    num_workers=args.num_workers,
                )
                runtimes = []
                num_iterations = 0
                for run in range(args.runs):
                    start = default_timer()
                    for batch in tqdm.tqdm(train_loader):
                        num_iterations += 1
                    stop = default_timer()
                    runtimes.append(round(stop - start, 3))
                average_time = round(sum(runtimes) / args.runs, 3)
                print(f'batch size={batch_size}, iterations={num_iterations}, '
                      f'runtimes={runtimes}, average runtime={average_time}')

        print('Evaluation sampling with all neighbors')
        for batch_size in args.eval_batch_sizes:
            subgraph_loader = NeighborLoader(
                data,
                num_neighbors=[-1],
                input_nodes=eval_idx,
                batch_size=batch_size,
                shuffle=False,
                num_workers=args.num_workers,
            )
            runtimes = []
            num_iterations = 0
            for run in range(args.runs):
                start = default_timer()
                for batch in tqdm.tqdm(subgraph_loader):
                    num_iterations += 1
                stop = default_timer()
                runtimes.append(round(stop - start, 3))
            average_time = round(sum(runtimes) / args.runs, 3)
            print(f'batch size={batch_size}, iterations={num_iterations}, '
                  f'runtimes={runtimes}, average runtime={average_time}')
    else:
        x = torch.tensor(tmp_event[['charge_log10','time','dom_x','dom_y','dom_z']].values,dtype=torch.float) #Features
        pos = torch.tensor(tmp_event[['dom_x','dom_y','dom_z']].values,dtype=torch.float) #Position

    query = "SELECT energy_log10, time, position_x, position_y, position_z, direction_x, direction_y, direction_z, azimuth, zenith FROM truth WHERE event_no = {}".format(event_no)
    y = pd.read_sql(query,con)

    y = torch.tensor(y.values,dtype=torch.float) #Target

    dat = Data(x=x,edge_index=None,edge_attr=None,y=y,pos=pos) 
    
#     T.KNNGraph(loop=True)(dat) #defining edges by k-NN with k=6 !!! Make sure .pos is not scaled!!! ie. x,y,z  -!-> ax,by,cz
    
    T.KNNGraph(k=6, loop=False, force_undirected = False)(dat)
    dat.adj_t = None
    T.ToUndirected()(dat)
    T.AddSelfLoops()(dat)
    (row, col) = dat.edge_index
    dat.edge_index = torch.stack([col,row],dim=0)
    
    data_list.append(dat)

    if (i+1) % subdivides == 0:
        data, slices = InMemoryDataset.collate(data_list)
        torch.save((data,slices), destination + '/{}k_{}{}.pt'.format(subdivides//1000,save_filename,subset))
        subset += 1
        data_list = [] #Does this free up the memory?
    
    if i % 500 == 0:
        print("{}: Completed {}/{}".format(datetime.now(),i,N))