Beispiel #1
0
def read_tu_data(folder, prefix):
    files = glob.glob(osp.join(folder, '{}_*.txt'.format(prefix)))
    names = ['_'.join(f.split('/')[-1].split('_')[1:])[:-4] for f in files]

    edge_index = read_file(folder, prefix, 'A', torch.long).t() - 1
    batch = read_file(folder, prefix, 'graph_indicator', torch.long) - 1

    node_attributes, node_labels = None, None
    if 'node_attributes' in names:
        node_attributes = read_file(folder, prefix, 'node_attributes')
    if 'node_labels' in names:
        node_labels = read_file(folder, prefix, 'node_labels', torch.long)
        node_labels = one_hot(node_labels - node_labels.min(dim=0)[0])
    x = cat([node_attributes, node_labels])

    edge_attributes, edge_labels = None, None
    if 'edge_attributes' in names:
        edge_attributes = read_file(folder, prefix, 'edge_attributes')
    if 'edge_labels' in names:
        edge_labels = read_file(folder, prefix, 'edge_labels', torch.long) - 1
        edge_labels = one_hot(edge_labels - edge_labels.min(dim=0)[0])
    edge_attr = cat([edge_attributes, edge_labels])

    y = None
    if 'graph_attributes' in names:  # Regression problem.
        y = read_file(folder, prefix, 'graph_attributes')
    if 'graph_labels' in names:  # Classification problem.
        y = read_file(folder, prefix, 'graph_labels', torch.long)
        y -= y.min(dim=0)[0]

    num_nodes = edge_index.max().item() + 1 if x is None else x.size(0)
    edge_index, edge_attr = remove_self_loops(edge_index, edge_attr)
    edge_index, edge_attr = coalesce(edge_index, edge_attr, num_nodes,
                                     num_nodes)

    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
    data, slices = split(data, batch)

    return data, slices
Beispiel #2
0
def read_tu_files(path,
                  prefix,
                  graph_indicator=False,
                  graph_attributes=False,
                  graph_labels=False,
                  node_attributes=False,
                  node_labels=False,
                  edge_attributes=False,
                  edge_labels=False):

    file_path = filename(prefix, 'A', path)
    edge_index = read_txt(file_path, sep=',', out=Long()) - 1
    edge_index, perm = coalesce(edge_index.t())

    x = tmp1 = tmp2 = None
    if node_attributes:
        file_path = filename(prefix, 'node_attributes', path)
        tmp1 = read_txt(file_path, sep=',')
    if node_labels:
        file_path = filename(prefix, 'node_labels', path)
        tmp2 = one_hot(read_txt(file_path, sep=',', out=Long()) - 1)
    x = cat(tmp1, tmp2)

    edge_attr = tmp1 = tmp2 = None
    if edge_attributes:
        file_path = filename(prefix, 'edge_attributes', path)
        tmp1 = read_txt(file_path, sep=',')[perm]
    if edge_labels:
        file_path = filename(prefix, 'edge_labels', path)
        tmp2 = read_txt(file_path, sep=',')[perm] - 1
    edge_attr = cat(tmp1, tmp2)

    y = None
    if graph_attributes:  # Regression problem.
        file_path = filename(prefix, 'graph_attributes', path)
        y = read_txt(file_path, sep=',')
    if graph_labels:  # Classification problem.
        file_path = filename(prefix, 'graph_labels', path)
        y = read_txt(file_path, sep=',', out=Long()) - 1

    dataset = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)

    if graph_indicator:
        file_path = filename(prefix, 'graph_indicator', path)
        graph_indicator = read_txt(file_path, out=Long()) - 1
    else:
        graph_indicator = Long(x.size(0)).fill_(0)

    return compute_slices(dataset, graph_indicator)
Beispiel #3
0
def parse_sdf(src):
    counts_line = src[0].split()
    num_atoms, num_bonds = int(counts_line[0]), int(counts_line[1])

    atom_block = src[1:num_atoms + 1]
    pos = parse_txt(atom_block, end=3)
    x = parse_txt(atom_block, lambda x: elems[x], start=3, end=4, out=Long())
    x = one_hot(x, len(elems))

    bond_block = src[1 + num_atoms:1 + num_atoms + num_bonds]
    edge_index = parse_txt(bond_block, end=2, out=Long()) - 1
    edge_index, perm = coalesce(edge_index.t())
    edge_attr = parse_txt(bond_block, start=2, end=3, out=Long())[perm] - 1

    return Data(x=x, edge=edge_index, edge_attr=edge_attr, pos=pos)
Beispiel #4
0
def mean_iou(pred, target, num_classes, batch=None):
    r"""Computes the mean Intersection over Union score.
    Args:
        pred (LongTensor): The predictions.
        target (LongTensor): The targets.
        num_classes (int): The number of classes.
        batch (LongTensor): The assignment vector which maps each pred-target
            pair to an example.
    :rtype: :class:`Tensor`
    """
    pred = one_hot(pred, num_classes, dtype=torch.long)
    target = one_hot(target, num_classes, dtype=torch.long)

    if batch is not None:
        i = scatter_add(pred & target, batch, dim=0).to(torch.float)
        u = scatter_add(pred | target, batch, dim=0).to(torch.float)
    else:
        i = (pred & target).sum(dim=0).to(torch.float)
        u = (pred | target).sum(dim=0).to(torch.float)

    iou = i / u
    iou[torch.isnan(iou)] = 1
    iou = iou.mean(dim=-1)
    return iou
Beispiel #5
0
def parse_sdf(src):
    src = src.split('\n')[3:]
    num_atoms, num_bonds = [int(item) for item in src[0].split()[:2]]

    atom_block = src[1:num_atoms + 1]
    pos = parse_txt_array(atom_block, end=3)
    x = torch.tensor([elems[item.split()[3]] for item in atom_block])
    x = one_hot(x, len(elems))

    bond_block = src[1 + num_atoms:1 + num_atoms + num_bonds]
    row, col = parse_txt_array(bond_block, end=2, dtype=torch.long).t() - 1
    row, col = torch.cat([row, col], dim=0), torch.cat([col, row], dim=0)
    edge_index = torch.stack([row, col], dim=0)
    edge_attr = parse_txt_array(bond_block, start=2, end=3) - 1
    edge_attr = torch.cat([edge_attr, edge_attr], dim=0)
    edge_index, edge_attr = coalesce(edge_index, edge_attr, num_atoms)

    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, pos=pos)
    return data
Beispiel #6
0
parser.add_argument('--target', default=0)
args = parser.parse_args()
target = int(args.target)

print('---- Target: {} ----'.format(target))

path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', '1-2-3-QM9')
dataset = QM9(
    path,
    transform=T.Compose([MyTransform(), T.Distance()]),
    pre_transform=MyPreTransform(),
    pre_filter=MyFilter())

dataset.data.iso_type_2 = torch.unique(dataset.data.iso_type_2, True, True)[1]
num_i_2 = dataset.data.iso_type_2.max().item() + 1
dataset.data.iso_type_2 = one_hot(dataset.data.iso_type_2, num_classes=num_i_2)

dataset.data.iso_type_3 = torch.unique(dataset.data.iso_type_3, True, True)[1]
num_i_3 = dataset.data.iso_type_3.max().item() + 1
dataset.data.iso_type_3 = one_hot(dataset.data.iso_type_3, num_classes=num_i_3)

dataset = dataset.shuffle()

# Normalize targets to mean = 0 and std = 1.
tenpercent = int(len(dataset) * 0.1)
mean = dataset.data.y[tenpercent:].mean(dim=0)
std = dataset.data.y[tenpercent:].std(dim=0)
dataset.data.y = (dataset.data.y - mean) / std

test_dataset = dataset[:tenpercent]
val_dataset = dataset[tenpercent:2 * tenpercent]
    def process(self):
        graph_file, task_file, train_file, test_file = self.raw_paths

        g = rdf.Graph()
        with gzip.open(graph_file, 'rb') as f:
            g.parse(file=f, format='nt')

        freq_ = Counter(g.predicates())

        def freq(rel):
            return freq_[rel] if rel in freq_ else 0

        relations = sorted(set(g.predicates()), key=lambda rel: -freq(rel))
        subjects = set(g.subjects())
        objects = set(g.objects())
        nodes = list(subjects.union(objects))

        relations_dict = {rel: i for i, rel in enumerate(list(relations))}
        nodes_dict = {node: i for i, node in enumerate(nodes)}

        edge_list = []
        for s, p, o in g.triples((None, None, None)):
            src, dst, rel = nodes_dict[s], nodes_dict[o], relations_dict[p]
            edge_list.append([src, dst, 2 * rel])
            edge_list.append([dst, src, 2 * rel + 1])

        edge_list = sorted(edge_list, key=lambda x: (x[0], x[1], x[2]))
        edge = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
        edge_index, edge_type = edge[:2], edge[2]

        oh = one_hot(edge_type, 2 * len(relations), dtype=torch.float)
        deg = scatter_add(oh, edge_index[0], dim=0, dim_size=len(nodes))
        index = edge_type + torch.arange(len(edge_list)) * 2 * len(relations)
        edge_norm = 1 / deg[edge_index[0]].view(-1)[index]

        if self.name == 'am':
            label_header = 'label_cateogory'
            nodes_header = 'proxy'
        elif self.name == 'aifb':
            label_header = 'label_affiliation'
            nodes_header = 'person'
        elif self.name == 'mutag':
            label_header = 'label_mutagenic'
            nodes_header = 'bond'
        elif self.name == 'bgs':
            label_header = 'label_lithogenesis'
            nodes_header = 'rock'

        labels_df = pd.read_csv(task_file, sep='\t')
        labels_set = set(labels_df[label_header].values.tolist())
        labels_dict = {lab: i for i, lab in enumerate(list(labels_set))}
        nodes_dict = {np.unicode(key): val for key, val in nodes_dict.items()}

        train_labels_df = pd.read_csv(train_file, sep='\t')
        train_indices, train_labels = [], []
        for nod, lab in zip(train_labels_df[nodes_header].values,
                            train_labels_df[label_header].values):
            train_indices.append(nodes_dict[nod])
            train_labels.append(labels_dict[lab])

        train_idx = torch.tensor(train_indices, dtype=torch.long)
        train_y = torch.tensor(train_labels, dtype=torch.long)

        test_labels_df = pd.read_csv(test_file, sep='\t')
        test_indices, test_labels = [], []
        for nod, lab in zip(test_labels_df[nodes_header].values,
                            test_labels_df[label_header].values):
            test_indices.append(nodes_dict[nod])
            test_labels.append(labels_dict[lab])

        test_idx = torch.tensor(test_indices, dtype=torch.long)
        test_y = torch.tensor(test_labels, dtype=torch.long)

        data = Data(edge_index=edge_index)
        data.edge_type = edge_type
        data.edge_norm = edge_norm
        data.train_idx = train_idx
        data.train_y = train_y
        data.test_idx = test_idx
        data.test_y = test_y

        data, slices = self.collate([data])
        torch.save((data, slices), self.processed_paths[0])
Beispiel #8
0
 def __call__(self, data):
     data.x = degree(data.edge_index[0], data.num_nodes, dtype=torch.long)
     data.x = one_hot(data.x, 136, torch.float)
     return data
import torch
from torch.autograd import Variable
from torch import nn
import torch.nn.functional as F
from torch_geometric.datasets import Cora
from torch_geometric.nn.modules import RandomWalk
from torch_geometric.utils import one_hot, softmax, degree  # noqa

path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data')
dataset = Cora(osp.join(path, 'Cora'))
data = dataset[0].cuda() if torch.cuda.is_available() else dataset[0]
edge_index, target = data.index, data.target
row, col = edge_index
num_nodes, num_features = data.input.size()
num_classes = target.max() + 1
one_hot = one_hot(target, num_classes)
edge_attr = Variable((1 / degree(row, num_nodes))[col])

train_mask = torch.arange(0, 20 * num_classes).long()
val_mask = torch.arange(train_mask.size(0), train_mask.size(0) + 500).long()
test_mask = torch.arange(num_nodes - 1000, num_nodes).long()

one_hot[train_mask[-1]:, :] = 0

one_hot, target = Variable(one_hot), Variable(target)


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(num_features, 16)