def read_tu_data(folder, prefix): files = glob.glob(osp.join(folder, '{}_*.txt'.format(prefix))) names = ['_'.join(f.split('/')[-1].split('_')[1:])[:-4] for f in files] edge_index = read_file(folder, prefix, 'A', torch.long).t() - 1 batch = read_file(folder, prefix, 'graph_indicator', torch.long) - 1 node_attributes, node_labels = None, None if 'node_attributes' in names: node_attributes = read_file(folder, prefix, 'node_attributes') if 'node_labels' in names: node_labels = read_file(folder, prefix, 'node_labels', torch.long) node_labels = one_hot(node_labels - node_labels.min(dim=0)[0]) x = cat([node_attributes, node_labels]) edge_attributes, edge_labels = None, None if 'edge_attributes' in names: edge_attributes = read_file(folder, prefix, 'edge_attributes') if 'edge_labels' in names: edge_labels = read_file(folder, prefix, 'edge_labels', torch.long) - 1 edge_labels = one_hot(edge_labels - edge_labels.min(dim=0)[0]) edge_attr = cat([edge_attributes, edge_labels]) y = None if 'graph_attributes' in names: # Regression problem. y = read_file(folder, prefix, 'graph_attributes') if 'graph_labels' in names: # Classification problem. y = read_file(folder, prefix, 'graph_labels', torch.long) y -= y.min(dim=0)[0] num_nodes = edge_index.max().item() + 1 if x is None else x.size(0) edge_index, edge_attr = remove_self_loops(edge_index, edge_attr) edge_index, edge_attr = coalesce(edge_index, edge_attr, num_nodes, num_nodes) data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y) data, slices = split(data, batch) return data, slices
def read_tu_files(path, prefix, graph_indicator=False, graph_attributes=False, graph_labels=False, node_attributes=False, node_labels=False, edge_attributes=False, edge_labels=False): file_path = filename(prefix, 'A', path) edge_index = read_txt(file_path, sep=',', out=Long()) - 1 edge_index, perm = coalesce(edge_index.t()) x = tmp1 = tmp2 = None if node_attributes: file_path = filename(prefix, 'node_attributes', path) tmp1 = read_txt(file_path, sep=',') if node_labels: file_path = filename(prefix, 'node_labels', path) tmp2 = one_hot(read_txt(file_path, sep=',', out=Long()) - 1) x = cat(tmp1, tmp2) edge_attr = tmp1 = tmp2 = None if edge_attributes: file_path = filename(prefix, 'edge_attributes', path) tmp1 = read_txt(file_path, sep=',')[perm] if edge_labels: file_path = filename(prefix, 'edge_labels', path) tmp2 = read_txt(file_path, sep=',')[perm] - 1 edge_attr = cat(tmp1, tmp2) y = None if graph_attributes: # Regression problem. file_path = filename(prefix, 'graph_attributes', path) y = read_txt(file_path, sep=',') if graph_labels: # Classification problem. file_path = filename(prefix, 'graph_labels', path) y = read_txt(file_path, sep=',', out=Long()) - 1 dataset = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y) if graph_indicator: file_path = filename(prefix, 'graph_indicator', path) graph_indicator = read_txt(file_path, out=Long()) - 1 else: graph_indicator = Long(x.size(0)).fill_(0) return compute_slices(dataset, graph_indicator)
def parse_sdf(src): counts_line = src[0].split() num_atoms, num_bonds = int(counts_line[0]), int(counts_line[1]) atom_block = src[1:num_atoms + 1] pos = parse_txt(atom_block, end=3) x = parse_txt(atom_block, lambda x: elems[x], start=3, end=4, out=Long()) x = one_hot(x, len(elems)) bond_block = src[1 + num_atoms:1 + num_atoms + num_bonds] edge_index = parse_txt(bond_block, end=2, out=Long()) - 1 edge_index, perm = coalesce(edge_index.t()) edge_attr = parse_txt(bond_block, start=2, end=3, out=Long())[perm] - 1 return Data(x=x, edge=edge_index, edge_attr=edge_attr, pos=pos)
def mean_iou(pred, target, num_classes, batch=None): r"""Computes the mean Intersection over Union score. Args: pred (LongTensor): The predictions. target (LongTensor): The targets. num_classes (int): The number of classes. batch (LongTensor): The assignment vector which maps each pred-target pair to an example. :rtype: :class:`Tensor` """ pred = one_hot(pred, num_classes, dtype=torch.long) target = one_hot(target, num_classes, dtype=torch.long) if batch is not None: i = scatter_add(pred & target, batch, dim=0).to(torch.float) u = scatter_add(pred | target, batch, dim=0).to(torch.float) else: i = (pred & target).sum(dim=0).to(torch.float) u = (pred | target).sum(dim=0).to(torch.float) iou = i / u iou[torch.isnan(iou)] = 1 iou = iou.mean(dim=-1) return iou
def parse_sdf(src): src = src.split('\n')[3:] num_atoms, num_bonds = [int(item) for item in src[0].split()[:2]] atom_block = src[1:num_atoms + 1] pos = parse_txt_array(atom_block, end=3) x = torch.tensor([elems[item.split()[3]] for item in atom_block]) x = one_hot(x, len(elems)) bond_block = src[1 + num_atoms:1 + num_atoms + num_bonds] row, col = parse_txt_array(bond_block, end=2, dtype=torch.long).t() - 1 row, col = torch.cat([row, col], dim=0), torch.cat([col, row], dim=0) edge_index = torch.stack([row, col], dim=0) edge_attr = parse_txt_array(bond_block, start=2, end=3) - 1 edge_attr = torch.cat([edge_attr, edge_attr], dim=0) edge_index, edge_attr = coalesce(edge_index, edge_attr, num_atoms) data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, pos=pos) return data
parser.add_argument('--target', default=0) args = parser.parse_args() target = int(args.target) print('---- Target: {} ----'.format(target)) path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', '1-2-3-QM9') dataset = QM9( path, transform=T.Compose([MyTransform(), T.Distance()]), pre_transform=MyPreTransform(), pre_filter=MyFilter()) dataset.data.iso_type_2 = torch.unique(dataset.data.iso_type_2, True, True)[1] num_i_2 = dataset.data.iso_type_2.max().item() + 1 dataset.data.iso_type_2 = one_hot(dataset.data.iso_type_2, num_classes=num_i_2) dataset.data.iso_type_3 = torch.unique(dataset.data.iso_type_3, True, True)[1] num_i_3 = dataset.data.iso_type_3.max().item() + 1 dataset.data.iso_type_3 = one_hot(dataset.data.iso_type_3, num_classes=num_i_3) dataset = dataset.shuffle() # Normalize targets to mean = 0 and std = 1. tenpercent = int(len(dataset) * 0.1) mean = dataset.data.y[tenpercent:].mean(dim=0) std = dataset.data.y[tenpercent:].std(dim=0) dataset.data.y = (dataset.data.y - mean) / std test_dataset = dataset[:tenpercent] val_dataset = dataset[tenpercent:2 * tenpercent]
def process(self): graph_file, task_file, train_file, test_file = self.raw_paths g = rdf.Graph() with gzip.open(graph_file, 'rb') as f: g.parse(file=f, format='nt') freq_ = Counter(g.predicates()) def freq(rel): return freq_[rel] if rel in freq_ else 0 relations = sorted(set(g.predicates()), key=lambda rel: -freq(rel)) subjects = set(g.subjects()) objects = set(g.objects()) nodes = list(subjects.union(objects)) relations_dict = {rel: i for i, rel in enumerate(list(relations))} nodes_dict = {node: i for i, node in enumerate(nodes)} edge_list = [] for s, p, o in g.triples((None, None, None)): src, dst, rel = nodes_dict[s], nodes_dict[o], relations_dict[p] edge_list.append([src, dst, 2 * rel]) edge_list.append([dst, src, 2 * rel + 1]) edge_list = sorted(edge_list, key=lambda x: (x[0], x[1], x[2])) edge = torch.tensor(edge_list, dtype=torch.long).t().contiguous() edge_index, edge_type = edge[:2], edge[2] oh = one_hot(edge_type, 2 * len(relations), dtype=torch.float) deg = scatter_add(oh, edge_index[0], dim=0, dim_size=len(nodes)) index = edge_type + torch.arange(len(edge_list)) * 2 * len(relations) edge_norm = 1 / deg[edge_index[0]].view(-1)[index] if self.name == 'am': label_header = 'label_cateogory' nodes_header = 'proxy' elif self.name == 'aifb': label_header = 'label_affiliation' nodes_header = 'person' elif self.name == 'mutag': label_header = 'label_mutagenic' nodes_header = 'bond' elif self.name == 'bgs': label_header = 'label_lithogenesis' nodes_header = 'rock' labels_df = pd.read_csv(task_file, sep='\t') labels_set = set(labels_df[label_header].values.tolist()) labels_dict = {lab: i for i, lab in enumerate(list(labels_set))} nodes_dict = {np.unicode(key): val for key, val in nodes_dict.items()} train_labels_df = pd.read_csv(train_file, sep='\t') train_indices, train_labels = [], [] for nod, lab in zip(train_labels_df[nodes_header].values, train_labels_df[label_header].values): train_indices.append(nodes_dict[nod]) train_labels.append(labels_dict[lab]) train_idx = torch.tensor(train_indices, dtype=torch.long) train_y = torch.tensor(train_labels, dtype=torch.long) test_labels_df = pd.read_csv(test_file, sep='\t') test_indices, test_labels = [], [] for nod, lab in zip(test_labels_df[nodes_header].values, test_labels_df[label_header].values): test_indices.append(nodes_dict[nod]) test_labels.append(labels_dict[lab]) test_idx = torch.tensor(test_indices, dtype=torch.long) test_y = torch.tensor(test_labels, dtype=torch.long) data = Data(edge_index=edge_index) data.edge_type = edge_type data.edge_norm = edge_norm data.train_idx = train_idx data.train_y = train_y data.test_idx = test_idx data.test_y = test_y data, slices = self.collate([data]) torch.save((data, slices), self.processed_paths[0])
def __call__(self, data): data.x = degree(data.edge_index[0], data.num_nodes, dtype=torch.long) data.x = one_hot(data.x, 136, torch.float) return data
import torch from torch.autograd import Variable from torch import nn import torch.nn.functional as F from torch_geometric.datasets import Cora from torch_geometric.nn.modules import RandomWalk from torch_geometric.utils import one_hot, softmax, degree # noqa path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data') dataset = Cora(osp.join(path, 'Cora')) data = dataset[0].cuda() if torch.cuda.is_available() else dataset[0] edge_index, target = data.index, data.target row, col = edge_index num_nodes, num_features = data.input.size() num_classes = target.max() + 1 one_hot = one_hot(target, num_classes) edge_attr = Variable((1 / degree(row, num_nodes))[col]) train_mask = torch.arange(0, 20 * num_classes).long() val_mask = torch.arange(train_mask.size(0), train_mask.size(0) + 500).long() test_mask = torch.arange(num_nodes - 1000, num_nodes).long() one_hot[train_mask[-1]:, :] = 0 one_hot, target = Variable(one_hot), Variable(target) class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(num_features, 16)