Exemple #1
0
nodes = np.random.rand(num_nodes, 1)

edge_index = np.reshape(
    np.meshgrid(np.arange(num_nodes), np.arange(num_nodes)), (2, -1))
# edge_index = torch.tensor([[0,1], [1,2]])
x = torch.tensor(nodes)
# x = torch.tensor([[1], [2], [3]])
edge_index = torch.tensor(edge_index, dtype=torch.long)
y = torch.zeros(num_nodes, dtype=torch.bool)
y[nodes.argmin(0)] = True

print(x)
print(y)
print(edge_index)
data = Data(x=x, edge_index=edge_index, y=y)

import newtorkx as nx
import matplotlib.pyplot as plt
from torch_geometric.utils import to_networkx


def visualize(h, color, epoch=None, loss=None):
    plt.figure(figsize=(7, 7))
    plt.xticks([])
    plt.yticks([])

    if torch.is_tensor(h):
        h = h.detach().cpu().numpy()
        plt.scatter(h[:, 0], h[:, 1], s=140, c=color, cmap='Set2')
        if epoch is not None and loss is not None:
Exemple #2
0
    def process(self):
        graph_file, task_file, train_file, test_file = self.raw_paths

        g = rdf.Graph()
        with gzip.open(graph_file, 'rb') as f:
            g.parse(file=f, format='nt')

        freq_ = Counter(g.predicates())

        def freq(rel):
            return freq_[rel] if rel in freq_ else 0

        relations = sorted(set(g.predicates()), key=lambda rel: -freq(rel))
        subjects = set(g.subjects())
        objects = set(g.objects())
        nodes = list(subjects.union(objects))

        relations_dict = {rel: i for i, rel in enumerate(list(relations))}
        nodes_dict = {node: i for i, node in enumerate(nodes)}

        edge_list = []
        for s, p, o in g.triples((None, None, None)):
            src, dst, rel = nodes_dict[s], nodes_dict[o], relations_dict[p]
            edge_list.append([src, dst, 2 * rel])
            edge_list.append([dst, src, 2 * rel + 1])

        edge_list = sorted(edge_list, key=lambda x: (x[0], x[1], x[2]))
        edge = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
        edge_index, edge_type = edge[:2], edge[2]

        if self.name == 'am':
            label_header = 'label_cateogory'
            nodes_header = 'proxy'
        elif self.name == 'aifb':
            label_header = 'label_affiliation'
            nodes_header = 'person'
        elif self.name == 'mutag':
            label_header = 'label_mutagenic'
            nodes_header = 'bond'
        elif self.name == 'bgs':
            label_header = 'label_lithogenesis'
            nodes_header = 'rock'

        labels_df = pd.read_csv(task_file, sep='\t')
        labels_set = set(labels_df[label_header].values.tolist())
        labels_dict = {lab: i for i, lab in enumerate(list(labels_set))}
        nodes_dict = {np.unicode(key): val for key, val in nodes_dict.items()}

        train_labels_df = pd.read_csv(train_file, sep='\t')
        train_indices, train_labels = [], []
        for nod, lab in zip(train_labels_df[nodes_header].values,
                            train_labels_df[label_header].values):
            train_indices.append(nodes_dict[nod])
            train_labels.append(labels_dict[lab])

        train_idx = torch.tensor(train_indices, dtype=torch.long)
        train_y = torch.tensor(train_labels, dtype=torch.long)

        test_labels_df = pd.read_csv(test_file, sep='\t')
        test_indices, test_labels = [], []
        for nod, lab in zip(test_labels_df[nodes_header].values,
                            test_labels_df[label_header].values):
            test_indices.append(nodes_dict[nod])
            test_labels.append(labels_dict[lab])

        test_idx = torch.tensor(test_indices, dtype=torch.long)
        test_y = torch.tensor(test_labels, dtype=torch.long)

        data = Data(edge_index=edge_index)
        data.edge_type = edge_type
        data.train_idx = train_idx
        data.train_y = train_y
        data.test_idx = test_idx
        data.test_y = test_y

        data, slices = self.collate([data])
        torch.save((data, slices), self.processed_paths[0])
Exemple #3
0
    n_flights = load_pickle(SOURCE_PATH / 'dataset/timeseries/data/travel_matrix.pkl')
    distances = 1 - distances

    norm = np.max(cases)
    cases = cases / np.max(cases)
    edges = distances / np.max(distances) + n_flights / np.max(n_flights)
    edges /= np.max(edges)

    labels = torch.FloatTensor(cases[:, 0])
    features = torch.FloatTensor(cases[:, 1:])
    edge_attr = torch.FloatTensor(edges[np.nonzero(edges)].reshape(-1, 1))
    edge_index = torch.FloatTensor(np.argwhere(edges != 0).transpose())
    # edges[edges <= 0.2] = 0
    edges = torch.FloatTensor(edges)
    idx = torch.tensor(list(range(features.size(0))))
    data = Data(x=features, edge_index=edge_index, edge_attr=edges, y=labels, idx=idx)
    model = GNN(data.x.size(1), 1).to(device)
    model.train()
    data = data.to(device)
    loss = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    for epoch in range(epochs):
        optimizer.zero_grad()
        pred = model(data)
        # print(pred.size())
        cost = loss(pred, data.y)
        print(cost)
        cost.backward()
        optimizer.step()
    for x, y in zip(pred, data.y):
        print(int(x.item() * norm), int(y.item() * norm))
Exemple #4
0
def get_messages(ogn, trainloader, n_msg=250):
    print(
        "Warning: this function assumes that only a single message component dominates",
        flush=True)
    all_msg_input = []
    all_msgs = []
    all_msg_sums = []
    all_nodes = []
    all_outputs = []

    X = trainloader.data.x
    y = trainloader.data.y
    batch = trainloader.batch_size

    i = 0

    for subgraph in trainloader():

        n_offset = len(subgraph.n_id)
        cur_len = n_offset
        cur_edge_index = subgraph.blocks[0].edge_index.clone()
        cur_edge_index[0] += n_offset
        g = Data(x=torch.cat(
            (X[subgraph.n_id], X[subgraph.blocks[0].n_id])).cuda(),
                 y=torch.cat(
                     (y[subgraph.n_id], y[subgraph.blocks[0].n_id])).cuda(),
                 edge_index=cur_edge_index.cuda())

        s1 = g.x[g.edge_index[0]]
        s2 = g.x[g.edge_index[1]]
        msg_input = torch.cat([s1[:, :3] - s2[:, :3], s1[:, 3:], s2[:, 3:]],
                              dim=1)

        raw_msg = ogn.msg_fnc(msg_input)
        msg_input = msg_input.detach().cpu().numpy()
        all_msg_input.append(msg_input)

        best_msg_idx = np.argmax(raw_msg.std(0).detach().cpu().numpy())
        best_msgs = raw_msg[:, best_msg_idx].detach().cpu().numpy()
        all_msgs.append(best_msgs)

        associated_sum_message = np.array([
            raw_msg[np.argwhere(g.edge_index[1].detach().cpu().numpy() == i).
                    T].sum(0)[best_msg_idx].detach().cpu().numpy()
            for i in range(batch)
        ])
        all_msg_sums.append(associated_sum_message)
        node = g.x[list(range(batch))]
        output = ogn(g)
        all_nodes.append(node.detach().cpu().numpy())
        all_outputs.append(output.detach().cpu().numpy())

        i += 1
        if i > n_msg:
            break

    all_msg_input = np.concatenate(all_msg_input)
    all_msgs = np.concatenate(all_msgs)
    all_msg_sums = np.concatenate(all_msg_sums)
    all_nodes = np.concatenate(all_nodes)
    all_outputs = np.concatenate(all_outputs)

    #plt.scatter(
    #    x=np.arange(raw_msg.std(0).shape[0]),
    #    y=np.log10(np.sort(raw_msg.std(0).detach().cpu().numpy())),
    #    s=3
    #)

    msg_func_data = pd.DataFrame({
        **{
            'dx dy dz vx1 vy1 vz1 M1 vx2 vy2 vz2 M2'.split(' ')[i]: all_msg_input[:, i]
            for i in range(all_msg_input.shape[1])
        },
        **{
            'message': all_msgs
        }
    })

    node_func_data = pd.DataFrame({
        **{
            'x y z vx vy vz M'.split(' ')[i]: all_nodes[:, i]
            for i in range(7)
        },
        **{
            'message': all_msg_sums,
            'output': all_outputs[:, 0]
        }
    })

    idx_node = np.arange(node_func_data.shape[0])
    np.random.shuffle(idx_node)

    idx_msg = np.arange(msg_func_data.shape[0])
    np.random.shuffle(idx_msg)

    return {
        'node_function':
        node_func_data.iloc[idx_node],  #.iloc[:5000].to_csv('node_func.csv');
        'msg_function':
        msg_func_data.iloc[idx_msg]  #.iloc[:5000].to_csv('msg_func.csv')
    }
Exemple #5
0
    def process(self):
        from PIL import Image
        import torchvision.transforms as T
        import torchvision.models as models

        splits = np.load(osp.join(self.raw_dir, 'splits.npz'),
                         allow_pickle=True)
        category_idx = self.categories.index(self.category)
        train_split = list(splits['train'])[category_idx]
        test_split = list(splits['test'])[category_idx]

        image_path = osp.join(self.raw_dir, 'images', 'JPEGImages')
        info_path = osp.join(self.raw_dir, 'images', 'Annotations')
        annotation_path = osp.join(self.raw_dir, 'annotations')

        labels = {}

        vgg16_outputs = []

        def hook(module, x, y):
            vgg16_outputs.append(y)

        vgg16 = models.vgg16(pretrained=True).to(self.device)
        vgg16.eval()
        vgg16.features[20].register_forward_hook(hook)  # relu4_2
        vgg16.features[25].register_forward_hook(hook)  # relu5_1

        transform = T.Compose([
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        train_set, test_set = [], []
        for i, name in enumerate(chain(train_split, test_split)):
            filename = '_'.join(name.split('/')[1].split('_')[:-1])
            idx = int(name.split('_')[-1].split('.')[0]) - 1

            path = osp.join(info_path, f'{filename}.xml')
            obj = minidom.parse(path).getElementsByTagName('object')[idx]

            trunc = obj.getElementsByTagName('truncated')[0].firstChild.data
            occ = obj.getElementsByTagName('occluded')
            occ = '0' if len(occ) == 0 else occ[0].firstChild.data
            diff = obj.getElementsByTagName('difficult')[0].firstChild.data

            if bool(int(trunc)) or bool(int(occ)) or bool(int(diff)):
                continue

            if self.category == 'person' and int(filename[:4]) > 2008:
                continue

            xmin = float(obj.getElementsByTagName('xmin')[0].firstChild.data)
            xmax = float(obj.getElementsByTagName('xmax')[0].firstChild.data)
            ymin = float(obj.getElementsByTagName('ymin')[0].firstChild.data)
            ymax = float(obj.getElementsByTagName('ymax')[0].firstChild.data)
            box = (xmin, ymin, xmax, ymax)

            dom = minidom.parse(osp.join(annotation_path, name))
            keypoints = dom.getElementsByTagName('keypoint')
            poss, ys = [], []
            for keypoint in keypoints:
                label = keypoint.attributes['name'].value
                if label not in labels:
                    labels[label] = len(labels)
                ys.append(labels[label])
                x = float(keypoint.attributes['x'].value)
                y = float(keypoint.attributes['y'].value)
                poss += [x, y]
            y = torch.tensor(ys, dtype=torch.long)
            pos = torch.tensor(poss, dtype=torch.float).view(-1, 2)

            if pos.numel() == 0:
                continue  # These examples do not make any sense anyway...

            # Add a small offset to the bounding because some keypoints lay
            # outside the bounding box intervals.
            box = (min(pos[:, 0].min().floor().item(), box[0]) - 16,
                   min(pos[:, 1].min().floor().item(), box[1]) - 16,
                   max(pos[:, 0].max().ceil().item(), box[2]) + 16,
                   max(pos[:, 1].max().ceil().item(), box[3]) + 16)

            # Rescale keypoints.
            pos[:, 0] = (pos[:, 0] - box[0]) * 256.0 / (box[2] - box[0])
            pos[:, 1] = (pos[:, 1] - box[1]) * 256.0 / (box[3] - box[1])

            path = osp.join(image_path, f'{filename}.jpg')
            with open(path, 'rb') as f:
                img = Image.open(f).convert('RGB').crop(box)
                img = img.resize((256, 256), resample=Image.BICUBIC)

            img = transform(img)

            data = Data(img=img, pos=pos, y=y, name=filename)

            if i < len(train_split):
                train_set.append(data)
            else:
                test_set.append(data)

        data_list = list(chain(train_set, test_set))
        imgs = [data.img for data in data_list]
        loader = DataLoader(imgs, self.batch_size, shuffle=False)
        for i, batch_img in enumerate(loader):
            vgg16_outputs.clear()

            with torch.no_grad():
                vgg16(batch_img.to(self.device))

            out1 = F.interpolate(vgg16_outputs[0], (256, 256),
                                 mode='bilinear',
                                 align_corners=False)
            out2 = F.interpolate(vgg16_outputs[1], (256, 256),
                                 mode='bilinear',
                                 align_corners=False)

            for j in range(out1.size(0)):
                data = data_list[i * self.batch_size + j]
                idx = data.pos.round().long().clamp(0, 255)
                x_1 = out1[j, :, idx[:, 1], idx[:, 0]].to('cpu')
                x_2 = out2[j, :, idx[:, 1], idx[:, 0]].to('cpu')
                data.img = None
                data.x = torch.cat([x_1.t(), x_2.t()], dim=-1)
            del out1
            del out2

        if self.pre_filter is not None:
            train_set = [data for data in train_set if self.pre_filter(data)]
            test_set = [data for data in test_set if self.pre_filter(data)]

        if self.pre_transform is not None:
            train_set = [self.pre_transform(data) for data in train_set]
            test_set = [self.pre_transform(data) for data in test_set]

        torch.save(self.collate(train_set), self.processed_paths[0])
        torch.save(self.collate(test_set), self.processed_paths[1])
    def get_sp_info(self, img, target):
        # 3. Super Pixel
        deal_super_pixel = DealSuperPixel(image_data=img,
                                          ds_image_size=self.image_size,
                                          super_pixel_size=self.sp_size)
        segment, super_pixel_info, adjacency_info = deal_super_pixel.run()

        # Resize Super Pixel
        _now_data_list = []
        for key in super_pixel_info:
            _now_data = cv2.resize(super_pixel_info[key]["data2"] / 255,
                                   (self.sp_ve_size, self.sp_ve_size),
                                   interpolation=cv2.INTER_NEAREST)
            _now_data_list.append(_now_data)
            pass
        net_data = np.transpose(_now_data_list, axes=(0, 3, 1, 2))

        # 4. Visual Embedding
        shape_feature, texture_feature = self.ve_model.forward(
            torch.from_numpy(net_data).float().to(self.device))
        shape_feature, texture_feature = shape_feature.detach().numpy(
        ), texture_feature.detach().numpy()
        for sp_i in range(len(super_pixel_info)):
            super_pixel_info[sp_i]["feature_shape"] = shape_feature[sp_i]
            super_pixel_info[sp_i]["feature_texture"] = texture_feature[sp_i]
            pass

        # Data for Batch: super_pixel_info
        x, pos, area, size = [], [], [], []
        for sp_i in range(len(super_pixel_info)):
            now_sp = super_pixel_info[sp_i]

            _size = now_sp["size"]
            _area = now_sp["area"]
            _x = np.concatenate(
                [now_sp["feature_shape"], now_sp["feature_texture"], [_size]],
                axis=0)

            x.append(_x)
            size.append([_size])
            area.append(_area)
            pos.append([_area[1] - _area[0], _area[3] - _area[2]])
            pass

        # Data for Batch: adjacency_info
        edge_index, edge_w = [], []
        for edge_i in range(len(adjacency_info)):
            edge_index.append(
                [adjacency_info[edge_i][0], adjacency_info[edge_i][1]])
            # edge_w.append([adjacency_info[edge_i][2], adjacency_info[edge_i][2]])
            edge_w.append(adjacency_info[edge_i][2])
            pass
        edge_index = np.transpose(edge_index, axes=(1, 0))

        # Data for Batch: Data
        g_data = Data(x=torch.from_numpy(np.asarray(x)).float(),
                      edge_index=torch.from_numpy(edge_index),
                      y=torch.tensor([target]),
                      pos=torch.from_numpy(np.asarray(pos)),
                      area=torch.from_numpy(np.asarray(area)),
                      size=torch.from_numpy(np.asarray(size)),
                      edge_w=torch.from_numpy(np.asarray(edge_w)).float())
        return g_data
Exemple #7
0
def newData(nodeFeats, edgeSyms, graphLab):
    return Data(
        x=torch.tensor(nodeFeats, dtype=torch.float),  # node features
        edge_index=torch.tensor(edgeSyms).t().contiguous(),  # edge
        y=torch.tensor(graphLab))  # graph label
Exemple #8
0
    def get_batch(self, X):
        # Wrap input node and edge features, along with the single edge_index, into a `torch_geometric.data.Batch` instance
        data_list = [Data(x=x) for x in X]

        return Batch.from_data_list(data_list)
Exemple #9
0
    def forward(self, X, edge_index, edge_weight):
        """
        :param X: Input data of shape (batch_size, num_nodes, in_channels)
        :param edge_index: Graph connectivity in COO format with shape(2, num_edges)
        :param edge_weight: Edge feature matrix with shape (num_edges, num_edge_features)
        :return: Output data of shape (batch_size, num_nodes, out_channels)
        """
        if torch.is_tensor(X):
            sz = X.shape
        if self.gcn_partition == 'cluster':
            out = torch.zeros(sz[0], sz[1], self.out_channels, device=X.device)
            graph_data = Data(edge_index=edge_index,
                              edge_attr=edge_weight,
                              train_mask=torch.arange(0, sz[1]),
                              num_nodes=sz[1]).to('cpu')
            cluster_data = ClusterData(graph_data,
                                       num_parts=50,
                                       recursive=False,
                                       save_dir='./data/cluster')
            loader = ClusterLoader(cluster_data,
                                   batch_size=5,
                                   shuffle=True,
                                   num_workers=0)

            for subgraph in loader:
                out[:, subgraph.train_mask] = self.gcn(
                    X[:, subgraph.train_mask],
                    subgraph.edge_index.to(X.device),
                    subgraph.edge_attr.to(X.device))

        elif self.gcn_partition == 'sample':
            # Use NeighborSampler() to iterates over graph nodes in a mini-batch fashion
            # and constructs sampled subgraphs (use cpu for no CUDA version)
            out = torch.zeros(sz[0], sz[1], self.out_channels, device=X.device)
            graph_data = Data(edge_index=edge_index, num_nodes=sz[1]).to('cpu')
            loader = NeighborSampler(graph_data,
                                     size=[10, 5],
                                     num_hops=2,
                                     batch_size=120,
                                     shuffle=True,
                                     add_self_loops=False)

            for data_flow in loader():
                block1 = data_flow[0]
                t = self.gcn1(X, edge_index[:, block1.e_id],
                              edge_weight[block1.e_id])
                block2 = data_flow[1]
                part_out = self.gcn2(t, edge_index[:, block2.e_id],
                                     edge_weight[block2.e_id])
                out[:, data_flow.n_id] = part_out[:, data_flow.n_id]

        elif self.batch_training:
            if self.adj_available:
                out = self.gcn(X, edge_index, edge_weight)
            else:
                out = self.gcn(X, edge_index)

        else:
            # Currently, conv in [GATConv] cannot use argument node_dim for batch training
            # This is a temp solution but it's very very very slow!
            # Costing about 6 times more than batch_training
            batch = self.get_batch(X)
            if self.adj_available:
                out = self.gcn(batch.x, edge_index, edge_weight)
            else:
                out = self.gcn(batch.x, edge_index)
            out = out.view(sz[0], sz[1], -1)

        return out
load_local_torchpoints3d()

from torch_points3d.models.segmentation.pointnet import PointNet
from torch_geometric.data import Data, Batch
from torch_points3d.datasets.batch import SimpleBatch

##################### PARTIAL_DENSE FORMAT #####################

num_points = 500
num_classes = 10
input_nc = 3

pos = torch.randn((num_points, 3))
x = torch.randn((num_points, input_nc))

data = Data(pos=pos, x=x)
data = Batch.from_data_list([data, data])

print(data)
#Batch(batch=[1000], pos=[1000, 3], x=[1000, 3])

pointnet = PointNet(OmegaConf.create({'conv_type': 'PARTIAL_DENSE'}))

pointnet.set_input(data, "cpu")
data_out = pointnet.forward()
print(data_out.shape)
# torch.Size([1000, 4])

##################### DENSE FORMAT #####################

num_points = 500
Exemple #11
0
 def __getitem__(self, idx):
     exclude_graph_X, exclude_graph_edge_index = self.subgraph_build_func(self.X, self.edge_index, self.exclude_mask[idx])
     include_graph_X, include_graph_edge_index = self.subgraph_build_func(self.X, self.edge_index, self.include_mask[idx])
     exclude_data = Data(x=exclude_graph_X, edge_index=exclude_graph_edge_index)
     include_data = Data(x=include_graph_X, edge_index=include_graph_edge_index)
     return exclude_data, include_data
# We convert the individual graphs into a single big one, so that sampling
# neighbors does not need to care about different edge types.
# This will return the following:
# * `edge_index`: The new global edge connectivity.
# * `edge_type`: The edge type for each edge.
# * `node_type`: The node type for each node.
# * `local_node_idx`: The original index for each node.
# * `local2global`: A dictionary mapping original (local) node indices of
#    type `key` to global ones.
# `key2int`: A dictionary that maps original keys to their new canonical type.
out = group_hetero_graph(data.edge_index_dict, data.num_nodes_dict)
edge_index, edge_type, node_type, local_node_idx, local2global, key2int = out

homo_data = Data(edge_index=edge_index,
                 edge_attr=edge_type,
                 node_type=node_type,
                 local_node_idx=local_node_idx,
                 num_nodes=node_type.size(0))

homo_data.y = node_type.new_full((node_type.size(0), 1), -1)
homo_data.y[local2global['paper']] = data.y_dict['paper']

homo_data.train_mask = torch.zeros((node_type.size(0)), dtype=torch.bool)
homo_data.train_mask[local2global['paper'][split_idx['train']['paper']]] = True
homo_data.valid_mask = torch.zeros((node_type.size(0)), dtype=torch.bool)
homo_data.valid_mask[local2global['paper'][split_idx['valid']['paper']]] = True

rec_loss = torch.zeros(homo_data.num_nodes)
#print(homo_data)

train_loader = GraphSAINTRandomWalkSampler(homo_data,
    def __merge_edges__(self, x, data, edge_score, decimator):
        # Torch tensors
        batch = data.batch
        edge_index = data.edge_index

        # Build a priority queue to store edge costs and store which nodes are still valid
        PQ = PriorityQueue([(edge_score[i].item(), i)
                            for i in range(len(edge_score))])

        # Loop over edges, contracting edges and updating node positions
        nodes_remaining = set(range(decimator.num_vertices))
        cluster = torch.empty_like(batch, device=torch.device('cpu'))
        new_edge_indices = []
        i = 0
        while len(PQ) > 0:
            ei = PQ.popItem()

            # check if nodes have already been merged
            source, target = decimator.E[ei]
            if (source not in nodes_remaining) or (target
                                                   not in nodes_remaining):
                continue

            contracted = decimator.contractEdge(ei)
            if contracted:
                # this edge was successfully contracted
                nodes_remaining.remove(source)
                cluster[source] = i
                if source != target:
                    nodes_remaining.remove(target)
                    cluster[target] = i

                i += 1
                new_edge_indices.append(ei)

        # The remaining nodes are simply kept.
        for node_idx in nodes_remaining:
            cluster[node_idx] = i
            i += 1
        cluster = cluster.to(x.device)

        # We compute the new features as an addition of the old ones.
        new_x = scatter_add(x, cluster, dim=0, dim_size=i)
        if edge_score is not None:
            new_edge_score = edge_score[new_edge_indices]
            if len(nodes_remaining) > 0:
                remaining_score = x.new_ones(
                    (new_x.size(0) - len(new_edge_indices), ))
                new_edge_score = torch.cat([new_edge_score, remaining_score])
            new_x = new_x * new_edge_score.view(-1, 1)
        else:
            new_edge_score = x.new_ones((new_x.size(0), ))

        N = new_x.size(0)
        new_edge_index, _ = coalesce(cluster[edge_index], None, N, N)

        new_batch = x.new_empty(new_x.size(0), dtype=torch.long)
        new_batch = new_batch.scatter_(0, cluster, batch)

        unpool_info = self.unpool_description(edge_index=edge_index,
                                              cluster=cluster,
                                              batch=batch,
                                              new_edge_score=new_edge_score)

        # update mesh vertices
        #vi = torch.empty_like(new_batch, device=torch.device('cpu'))
        #vi[cluster] = torch.arange(cluster.size(0))
        #new_pos = data.pos[vi][:,0:3]
        vi = np.empty(i, dtype=np.int64)
        vi[cluster.cpu().numpy()] = np.arange(decimator.num_vertices)
        new_pos = torch.tensor(decimator.V[vi][:, 0:3])

        # update faces
        new_face = torch.empty_like(data.face)
        new_face[0, :] = cluster[data.face[
            0, :]]  # assign vertices to their new cluster id
        new_face[1, :] = cluster[data.face[
            1, :]]  # assign vertices to their new cluster id
        new_face[2, :] = cluster[data.face[
            2, :]]  # assign vertices to their new cluster id
        fi = (new_face[0, :]
              == new_face[1, :]) + (new_face[0, :] == new_face[2, :]) + (
                  new_face[1, :] == new_face[2, :]
              )  # faces with duplicate vertices
        new_face = new_face[:, ~fi]  # remove duplicates

        new_data = Data(edge_index=new_edge_index,
                        batch=new_batch,
                        pos=new_pos,
                        face=new_face)
        new_data.unpool_info = unpool_info

        return new_x, new_data
Exemple #14
0
def nx_to_graph_data_obj(
    g, center_id, allowable_features_downstream=None, allowable_features_pretrain=None, node_id_to_go_labels=None
):
    n_nodes = g.number_of_nodes()

    # nodes
    nx_node_ids = [n_i for n_i in g.nodes()]  # contains list of nx node ids
    # in a particular ordering. Will be used as a mapping to convert
    # between nx node ids and data obj node indices

    x = torch.tensor(np.ones(n_nodes).reshape(-1, 1), dtype=torch.float)
    # we don't have any node labels, so set to dummy 1. dim n_nodes x 1

    center_node_idx = nx_node_ids.index(center_id)
    center_node_idx = torch.tensor([center_node_idx], dtype=torch.long)

    # edges
    edges_list = []
    edge_features_list = []
    for node_1, node_2, attr_dict in g.edges(data=True):
        edge_feature = [
            attr_dict["w1"],
            attr_dict["w2"],
            attr_dict["w3"],
            attr_dict["w4"],
            attr_dict["w5"],
            attr_dict["w6"],
            attr_dict["w7"],
            0,
            0,
        ]  # last 2 indicate self-loop
        # and masking
        edge_feature = np.array(edge_feature, dtype=int)
        # convert nx node ids to data obj node index
        i = nx_node_ids.index(node_1)
        j = nx_node_ids.index(node_2)
        edges_list.append((i, j))
        edge_features_list.append(edge_feature)
        edges_list.append((j, i))
        edge_features_list.append(edge_feature)

    # data.edge_index: Graph connectivity in COO format with shape [2, num_edges]
    edge_index = torch.tensor(np.array(edges_list).T, dtype=torch.long)

    # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
    edge_attr = torch.tensor(np.array(edge_features_list), dtype=torch.float)

    try:
        species_id = int(nx_node_ids[0].split(".")[0])  # nx node id is of the form:
        # species_id.protein_id
        species_id = torch.tensor([species_id], dtype=torch.long)
    except Exception:  # occurs when nx node id has no species id info. For the extract
        # substructure context pair transform, where we convert a data obj to
        # a nx graph obj (which does not have original node id info)
        species_id = torch.tensor([0], dtype=torch.long)  # dummy species
        # id is 0

    # construct data obj
    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
    data.species_id = species_id
    data.center_node_idx = center_node_idx

    if node_id_to_go_labels:  # supervised case with go node labels
        # Construct a dim n_pretrain_go_classes tensor and a
        # n_downstream_go_classes tensor for the center node. 0 is no data
        # or negative, 1 is positive.
        downstream_go_node_feature = [0] * len(allowable_features_downstream)
        pretrain_go_node_feature = [0] * len(allowable_features_pretrain)
        if center_id in node_id_to_go_labels:
            go_labels = node_id_to_go_labels[center_id]
            # get indices of allowable_features_downstream that match with elements
            # in go_labels
            _, node_feature_indices, _ = np.intersect1d(allowable_features_downstream, go_labels, return_indices=True)
            for idx in node_feature_indices:
                downstream_go_node_feature[idx] = 1
            # get indices of allowable_features_pretrain that match with
            # elements in go_labels
            _, node_feature_indices, _ = np.intersect1d(allowable_features_pretrain, go_labels, return_indices=True)
            for idx in node_feature_indices:
                pretrain_go_node_feature[idx] = 1
        data.go_target_downstream = torch.tensor(np.array(downstream_go_node_feature), dtype=torch.long)
        data.go_target_pretrain = torch.tensor(np.array(pretrain_go_node_feature), dtype=torch.long)
    return data
Exemple #15
0
def test_data():
    torch_geometric.set_debug(True)

    x = torch.tensor([[1, 3, 5], [2, 4, 6]], dtype=torch.float).t()
    edge_index = torch.tensor([[0, 0, 1, 1, 2], [1, 1, 0, 2, 1]])
    data = Data(x=x, edge_index=edge_index).to(torch.device('cpu'))

    N = data.num_nodes

    assert data.x.tolist() == x.tolist()
    assert data['x'].tolist() == x.tolist()

    assert sorted(data.keys) == ['edge_index', 'x']
    assert len(data) == 2
    assert 'x' in data and 'edge_index' in data and 'pos' not in data

    assert data.__cat_dim__('x', data.x) == 0
    assert data.__cat_dim__('edge_index', data.edge_index) == -1
    assert data.__inc__('x', data.x) == 0
    assert data.__inc__('edge_index', data.edge_index) == data.num_nodes

    assert not data.x.is_contiguous()
    data.contiguous()
    assert data.x.is_contiguous()

    assert not data.is_coalesced()
    data.edge_index, _ = coalesce(data.edge_index, None, N, N)
    data = data.coalesce()
    assert data.is_coalesced()

    clone = data.clone()
    assert clone != data
    assert len(clone) == len(data)
    assert clone.x.tolist() == data.x.tolist()
    assert clone.edge_index.tolist() == data.edge_index.tolist()

    data['x'] = x + 1
    assert data.x.tolist() == (x + 1).tolist()

    assert data.__repr__() == 'Data(edge_index=[2, 4], x=[3, 2])'

    dictionary = {'x': data.x, 'edge_index': data.edge_index}
    data = Data.from_dict(dictionary)
    assert sorted(data.keys) == ['edge_index', 'x']

    assert not data.contains_isolated_nodes()
    assert not data.contains_self_loops()
    assert data.is_undirected()
    assert not data.is_directed()

    assert data.num_nodes == 3
    assert data.num_edges == 4
    assert data.num_faces is None
    assert data.num_node_features == 2
    assert data.num_features == 2

    data.edge_attr = torch.randn(data.num_edges, 2)
    assert data.num_edge_features == 2
    data.edge_attr = None

    data.x = None
    assert data.num_nodes == 3

    data.edge_index = None
    assert data.num_nodes is None
    assert data.num_edges is None

    data.num_nodes = 4
    assert data.num_nodes == 4

    data = Data(x=x, attribute=x)
    assert len(data) == 2
    assert data.x.tolist() == x.tolist()
    assert data.attribute.tolist() == x.tolist()

    face = torch.tensor([[0, 1], [1, 2], [2, 3]])
    data = Data(num_nodes=4, face=face)
    assert data.num_faces == 2
    assert data.num_nodes == 4

    data = Data(title="test")
    assert data.__repr__() == 'Data(title=test)'
    assert data.num_node_features == 0
    assert data.num_edge_features == 0

    torch_geometric.set_debug(False)
Exemple #16
0
    if use_cuda:
        device = torch.device('cuda:' + str(use_cuda))
    else:
        device = torch.device('cpu')

    # 2 read and processing data
    data = Dataset(data_path='/home/zhengyi/')
    # idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    adj, features, labels = data.adj, data.features, data.labels

    adj = sparse_mx_to_torch_sparse_long_tensor(adj)
    # features = sparse_mx_to_torch_sparse_tensor(features)
    features = torch.FloatTensor(features)
    labels = torch.LongTensor(labels - 1)
    # print(torch.min(labels))
    data = Data(x=features, edge_index=adj, y=labels).to(device)
    # gen idx_train, idx_val, idx_test
    _idx = np.arange(len(labels))
    val_size = 0.1
    test_size = 0.8
    train_size = 1 - val_size - test_size
    stratify = labels
    idx_train_and_val, idx_test = train_test_split(_idx,
                                                   random_state=None,
                                                   train_size=train_size +
                                                   val_size,
                                                   test_size=test_size,
                                                   stratify=stratify)

    stratify = stratify[idx_train_and_val]
    idx_train, idx_val = train_test_split(
    if tfs is not None:
        x_pos = torch.tensor(tfs['features']['dom_x'].inverse_transform(tmp_event[['dom_x']]),dtype=torch.float)
        y_pos = torch.tensor(tfs['features']['dom_y'].inverse_transform(tmp_event[['dom_y']]),dtype=torch.float)
        z_pos = torch.tensor(tfs['features']['dom_z'].inverse_transform(tmp_event[['dom_z']]),dtype=torch.float)
        x = torch.cat([torch.tensor(tmp_event[['charge_log10','time']].values,dtype=torch.float),x_pos,y_pos,z_pos],dim=1)
        pos = torch.cat([x_pos,y_pos,z_pos],dim=1)
    else:
        x = torch.tensor(tmp_event[['charge_log10','time','dom_x','dom_y','dom_z']].values,dtype=torch.float) #Features
        pos = torch.tensor(tmp_event[['dom_x','dom_y','dom_z']].values,dtype=torch.float) #Position

    query = "SELECT energy_log10, time, position_x, position_y, position_z, direction_x, direction_y, direction_z, azimuth, zenith FROM truth WHERE event_no = {}".format(event_no)
    y = pd.read_sql(query,con)

    y = torch.tensor(y.values,dtype=torch.float) #Target

    dat = Data(x=x,edge_index=None,edge_attr=None,y=y,pos=pos) 
    
#     T.KNNGraph(loop=True)(dat) #defining edges by k-NN with k=6 !!! Make sure .pos is not scaled!!! ie. x,y,z  -!-> ax,by,cz
    
    T.KNNGraph(k=6, loop=False, force_undirected = False)(dat)
    dat.adj_t = None
    T.ToUndirected()(dat)
    T.AddSelfLoops()(dat)
    (row, col) = dat.edge_index
    dat.edge_index = torch.stack([col,row],dim=0)
    
    data_list.append(dat)

    if (i+1) % subdivides == 0:
        data, slices = InMemoryDataset.collate(data_list)
        torch.save((data,slices), destination + '/{}k_{}{}.pt'.format(subdivides//1000,save_filename,subset))
Exemple #18
0
def main():
    global device
    global graphname

    print(socket.gethostname())
    seed = 0

    if not download:
        mp.set_start_method('spawn', force=True)
        outputs = None
        if "OMPI_COMM_WORLD_RANK" in os.environ.keys():
            os.environ["RANK"] = os.environ["OMPI_COMM_WORLD_RANK"]

        # Initialize distributed environment with SLURM
        if "SLURM_PROCID" in os.environ.keys():
            os.environ["RANK"] = os.environ["SLURM_PROCID"]

        if "SLURM_NTASKS" in os.environ.keys():
            os.environ["WORLD_SIZE"] = os.environ["SLURM_NTASKS"]

        if "MASTER_ADDR" not in os.environ.keys():
            os.environ["MASTER_ADDR"] = "127.0.0.1"

        os.environ["MASTER_PORT"] = "1234"
        dist.init_process_group(backend='nccl')
        rank = dist.get_rank()
        size = dist.get_world_size()
        print("Processes: " + str(size))

        # device = torch.device('cpu')
        devid = rank_to_devid(rank, acc_per_rank)
        device = torch.device('cuda:{}'.format(devid))
        torch.cuda.set_device(device)
        curr_devid = torch.cuda.current_device()
        # print(f"curr_devid: {curr_devid}", flush=True)
        devcount = torch.cuda.device_count()

    if graphname == "Cora":
        path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                        graphname)
        dataset = Planetoid(path, graphname, transform=T.NormalizeFeatures())
        data = dataset[0]
        data = data.to(device)
        data.x.requires_grad = True
        inputs = data.x.to(device)
        inputs.requires_grad = True
        data.y = data.y.to(device)
        edge_index = data.edge_index
        num_features = dataset.num_features
        num_classes = dataset.num_classes
    elif graphname == "Reddit":
        path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                        graphname)
        dataset = Reddit(path, T.NormalizeFeatures())
        data = dataset[0]
        data = data.to(device)
        data.x.requires_grad = True
        inputs = data.x.to(device)
        inputs.requires_grad = True
        data.y = data.y.to(device)
        edge_index = data.edge_index
        num_features = dataset.num_features
        num_classes = dataset.num_classes
    elif graphname == 'Amazon':
        print(f"Loading coo...", flush=True)
        edge_index = torch.load("../data/Amazon/processed/data.pt")
        print(f"Done loading coo", flush=True)
        # edge_index = edge_index.t_()
        # n = 9430088
        n = 14249639
        # n = 14249640
        num_features = 300
        num_classes = 24
        # mid_layer = 24
        inputs = torch.rand(n, num_features)
        data = Data()
        data.y = torch.rand(n).uniform_(0, num_classes - 1).long()
        data.train_mask = torch.ones(n).long()
        # edge_index = edge_index.to(device)
        print(f"edge_index.size: {edge_index.size()}", flush=True)
        print(f"edge_index: {edge_index}", flush=True)
        data = data.to(device)
        # inputs = inputs.to(device)
        inputs.requires_grad = True
        data.y = data.y.to(device)
    elif graphname == 'subgraph3':
        print(f"Loading coo...", flush=True)
        edge_index = torch.load("../data/subgraph3/processed/data.pt")
        print(f"Done loading coo", flush=True)
        n = 8745542
        num_features = 128
        # mid_layer = 512
        # mid_layer = 64
        num_classes = 256
        inputs = torch.rand(n, num_features)
        data = Data()
        data.y = torch.rand(n).uniform_(0, num_classes - 1).long()
        data.train_mask = torch.ones(n).long()
        print(f"edge_index.size: {edge_index.size()}", flush=True)
        data = data.to(device)
        inputs.requires_grad = True
        data.y = data.y.to(device)

    if download:
        exit()

    if normalization:
        adj_matrix, _ = add_remaining_self_loops(edge_index,
                                                 num_nodes=inputs.size(0))
    else:
        adj_matrix = edge_index

    init_process(rank, size, inputs, adj_matrix, data, num_features,
                 num_classes, device, outputs, run)

    if outputs is not None:
        return outputs[0]
Exemple #19
0
    def __getitem__(self, idx):
        sampling_strategy = cfg.train_sampling if self.ds.sets == "train" else cfg.eval_sampling
        if self.num_graphs_in_matching_instance is None:
            raise ValueError("Num_graphs has to be set to an integer value.")

        idx = idx if self.true_epochs else None
        anno_list, perm_mat_list = self.ds.get_k_samples(
            idx,
            k=self.num_graphs_in_matching_instance,
            cls=self.cls,
            mode=sampling_strategy)
        for perm_mat in perm_mat_list:
            if (not perm_mat.size or
                (perm_mat.size < 2 * 2 and sampling_strategy == "intersection")
                    and not self.true_epochs):
                # 'and not self.true_epochs' because we assume all data is valid when sampling a true epoch
                next_idx = None if idx is None else idx + 1
                return self.__getitem__(next_idx)

        points_gt = [
            np.array([(kp["x"], kp["y"]) for kp in anno_dict["keypoints"]])
            for anno_dict in anno_list
        ]
        n_points_gt = [len(p_gt) for p_gt in points_gt]

        graph_list = []
        for p_gt, n_p_gt in zip(points_gt, n_points_gt):
            edge_indices, edge_features = build_graphs(p_gt, n_p_gt)

            # Add dummy node features so the __slices__ of them is saved when creating a batch
            pos = torch.tensor(p_gt).to(torch.float32) / 256.0
            assert (pos > -1e-5).all(), p_gt
            graph = Data(
                edge_attr=torch.tensor(edge_features).to(torch.float32),
                edge_index=torch.tensor(edge_indices, dtype=torch.long),
                x=pos,
                pos=pos,
            )
            graph.num_nodes = n_p_gt
            graph_list.append(graph)

        ret_dict = {
            "Ps": [torch.Tensor(x) for x in points_gt],
            "ns": [torch.tensor(x) for x in n_points_gt],
            "gt_perm_mat": perm_mat_list,
            "edges": graph_list,
        }

        imgs = [anno["image"] for anno in anno_list]
        if imgs[0] is not None:
            trans = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(cfg.NORM_MEANS, cfg.NORM_STD)
            ])
            imgs = [trans(img) for img in imgs]
            ret_dict["images"] = imgs
        elif "feat" in anno_list[0]["keypoints"][0]:
            feat_list = [
                np.stack([kp["feat"] for kp in anno_dict["keypoints"]],
                         axis=-1) for anno_dict in anno_list
            ]
            ret_dict["features"] = [torch.Tensor(x) for x in feat_list]

        return ret_dict
val_data = {6: np.arange(24) + 6, 7: np.arange(24) + 7}
# dense_adjacency = nx.to_pandas_adjacency(graph)
sparse_adj = nx.to_scipy_sparse_matrix(graph).tocoo()
sparse_adj_in_coo_format = np.stack([sparse_adj.row, sparse_adj.col])
sparse_adj_in_coo_format_tensor = torch.tensor(sparse_adj_in_coo_format,
                                               dtype=torch.long).cuda()

frame_data = pd.DataFrame.from_dict(data)
valframe = pd.DataFrame.from_dict(val_data)
data_graphs = []
for i in range(len(frame_data) - 1):
    x = torch.tensor([frame_data.iloc[i]], dtype=torch.double).cuda()
    x = x.permute(1, 0)  # nodes, features
    y = torch.tensor([frame_data.iloc[i + 1]], dtype=torch.double).cuda()
    y = y.permute(1, 0)  # nodes, features
    data_entry = Data(x=x, y=y, edge_index=sparse_adj_in_coo_format_tensor)
    data_graphs.append(data_entry)
loader = DataLoader(data_graphs, batch_size=1)
val_graphs = []
for i in range(len(valframe) - 1):
    x = torch.tensor([valframe.iloc[i]], dtype=torch.double).cuda()
    x = x.permute(1, 0)
    y = torch.tensor([valframe.iloc[i]], dtype=torch.double).cuda()
    y = y.permute(1, 0)
    val_data_entry = Data(x=x, y=y, edge_index=sparse_adj_in_coo_format_tensor)
    val_graphs.append(val_data_entry)
val_loader = DataLoader(val_graphs, batch_size=1)

#training_tensors = dicttotensor(data)
#val_tensors = dicttotensor(val_data)
model = Net(data_graphs[0]['x'].shape[0])
Exemple #21
0
def do_training(ogn,
                graph,
                lr=1e-3,
                total_epochs=100,
                batch_per_epoch=1500,
                weight_decay=1e-8,
                batch=32,
                l1=1e-2):

    idx = graph.edge_index.cuda()
    X = graph.x.cuda()
    y = graph.y.cuda()
    N = graph.x.shape[0]
    device = torch.device('cuda')
    v = torch.ones(idx.shape[1], device=device)
    mat = sparse.IntTensor(idx, v, torch.Size([N, N]))
    mat2 = ts.tensor.SparseTensor.from_torch_sparse_coo_tensor(mat,
                                                               has_value=False)
    row, col, _ = mat2.csr()

    # Set up optimizer:
    init_lr = lr
    opt = torch.optim.Adam(ogn.parameters(),
                           lr=init_lr,
                           weight_decay=weight_decay)

    sched = OneCycleLR(
        opt,
        max_lr=init_lr,
        steps_per_epoch=batch_per_epoch,  #len(trainloader),
        epochs=total_epochs,
        final_div_factor=1e5)

    all_losses = []
    epoch = 0

    for epoch in trange(epoch, total_epochs):
        ogn.cuda()
        total_loss = 0.0
        i = 0
        num_items = 0

        while i < batch_per_epoch:
            opt.zero_grad()

            node_idx = torch.randint(0, N - 1, (batch, ), device=device)
            neighbor_idx = torch.cat([
                col[row[node_idx[i]]:row[node_idx[i] + 1]]
                for i in range(batch)
            ])

            new_node_idx = torch.cat([
                torch.ones(row[node_idx[i] + 1] - row[node_idx[i]],
                           dtype=int,
                           device=device) * i for i in range(batch)
            ])
            new_neighbor_idx = torch.arange(batch,
                                            batch + len(neighbor_idx),
                                            device=device,
                                            dtype=int)

            Xcur = torch.cat([X[node_idx], X[neighbor_idx]], dim=0)
            ycur = torch.cat([y[node_idx], y[neighbor_idx]], dim=0)

            edge_index = torch.cat([
                new_neighbor_idx[None], new_node_idx[None]
            ])  #new_node_idx[None], new_neighbor_idx[None]])

            g = Data(x=Xcur, y=ycur, edge_index=edge_index)

            loss, reg = new_loss(ogn, g, batch, regularization=l1)
            ((loss + reg) / int(batch + 1)).backward()

            opt.step()
            sched.step()

            total_loss += loss.item()
            i += 1
            num_items += batch

        cur_loss = total_loss / num_items
        all_losses.append(cur_loss)
        print(cur_loss, flush=True)

    return all_losses
Exemple #22
0
dataSel = data_list[0]

# print(data_list[0],filename_list[0])
# print(data)

loader = DataLoader(data_list, batch_size=len(data_list), shuffle=False)
# for data in loader: #batch,
#     print(data)
#     print(data.x)
#     print(data.edge_index)
for batch in loader:
    # print(batch.num_features)
    # print(batch.num_graphs)
    pass

data = Data(x=dataSel.x, edge_index=dataSel.edge_index)
# print(data.num_features)

parser = argparse.ArgumentParser()
parser.add_argument('--model', type=str, default='GAE')
args = parser.parse_args()
assert args.model in ['GAE', 'VGAE']
kwargs = {'GAE': GAE, 'VGAE': VGAE}


class Encoder(torch.nn.Module):
    # def __init__(self, in_channels, out_channels):
    def __init__(self, in_channels, out_channels):
        super(Encoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True)
        if args.model in ['GAE']:
Exemple #23
0
def load_graph_data(realization=0, cutoff=30):
    try:
        cur_data = pd.read_hdf('halos_%d.h5' % (realization, ))
    except:
        from generate_halo_data_nv import generate_data
        generate_data(realization, get_cluster())
        cur_data = pd.read_hdf('halos_%d.h5' % (realization, ))


# # Now, let's connect nearby halos:

    xyz = np.array([cur_data.x, cur_data.y, cur_data.z]).T
    tree = KDTree(xyz)

    # ## Let's see what a good radius is. Let's aim for ~8 particles or so for average

    region_of_influence = cutoff

    #plt.hist(tree.query_radius(xyz, region_of_influence, count_only=True)-1, bins=31);
    #plt.xlabel('Number with')
    #plt.ylabel('Number of neighbors')

    # ## So, let's create the adjacency matrix:

    neighbors = tree.query_radius(xyz,
                                  region_of_influence,
                                  sort_results=True,
                                  return_distance=True)[0]

    all_edges = []
    for j in range(len(neighbors)):
        if len(neighbors[j]) == 1:
            continue
        #Receiving is second!
        cur = np.array([neighbors[j][1:],
                        np.ones(len(neighbors[j]) - 1) * j],
                       dtype=np.int64)

        all_edges.append(cur)

    all_edges = np.concatenate(all_edges, axis=1)

    # # Now let's put this data into PyTorch:

    X_raw = torch.from_numpy(
        np.array(cur_data['x y z vx vy vz M14'.split(' ')]))
    y_raw = torch.from_numpy(np.array(cur_data[['delta']]))
    pos_scale = X_raw[:, :3].std(0).mean(0)
    pos_mean = 500
    vel_scale = X_raw[:, 3:6].std(0).mean(0)
    M14_scale = X_raw[:, 6].std()
    X = X_raw.clone()
    X[:, :3] = (X[:, :3] - pos_mean) / pos_scale
    X[:, 3:6] = (X[:, 3:6]) / vel_scale
    X[:, 6] = (X[:, 6]) / M14_scale
    edge_index = torch.LongTensor(torch.from_numpy(all_edges))

    cur_data['z'].min()

    # Which nodes are far enough from the edge?

    nodes_far_from_edge = np.product(
        [((region_of_influence < cur_data[dim]) &
          (1000 - region_of_influence > cur_data[dim]))
         for dim in 'x y z'.split(' ')], 0).astype(np.float32)

    # We'll include this in the y-vector as a simple multiplier against the loss for bad nodes:

    y = torch.cat(
        [y_raw, torch.from_numpy(nodes_far_from_edge)[:, None]], dim=1)
    graph_data = Data(X, edge_index=edge_index, y=y)

    return {
        'graph': graph_data,
        'column_description':
        'x columns are [x, y, z, vx, vy, vz, M]; everything has been scaled to be std=1. y columns are [bias, mask], where mask=1 indicates that the node should be used as a receiver for training; mask=0 indicates that the node is too close to the edge. Multiply the node-wise loss by the mask during training.',
        'pos_scale': pos_scale,
        'vel_scale': vel_scale,
        'M14_scale': M14_scale
    }
Exemple #24
0
# edge_index = torch.tensor(edge_lists, dtype=torch.long)
# data2 = Data(edge_index=edge_index.t().contiguous())

# edge_lists = [x for x in edge_lists if not x[0]==x[1]]  # remove self loop
l = np.array(edge_lists)  # sort by first then second column
# https://stackoverflow.com/a/38194077/12859133
l = l[l[:, 1].argsort()]
l = l[l[:, 0].argsort(kind='mergesort')]
l = l.transpose()
r = np.array([l[1, :], l[0, :]])  # 建無向圖

edge_index = torch.from_numpy(r).long()
# edge_index = torch.from_numpy(r).long()-1  # node index應該都要從0開始

x = torch.arange(1, l[0].max() + 1).long()
data = Data(x, edge_index)  # 假設x是從node id 0 開始遞增
d = data.edge_index.data.numpy()

# 建nx的無向圖
G = nx.Graph()
G.add_edges_from(edge_lists)
# networkx無向圖轉Data
data3 = from_networkx(G)
data3.x = torch.tensor(list(G.nodes)).unsqueeze(
    -1)  # 不一定對的上, 因為node id可能早被重新編碼

# 很費時, 建好graph就直接存檔(networkx(用Data直接轉) + Data)
torch.save(data, dataset + 'global_graph_start0.pt')

# 如果是dynamic版的話:
# 用Data()建T張獨立的graph, 不要把這graph放進dataloader => 各自train GAT到T emb
####Node features Mesh nodes
x_mesh = torch.eye(len(set(mesh_filtered['DescriptorName_UI'])))

####Edges paper-mesh
mesh_filtered['PMID'] = [
    dict_reindex[e] for e in mesh_filtered['PMID'].tolist()
]
mesh_filtered['DescriptorName_UI'] = [
    dict_reindex_mesh[e] for e in mesh_filtered['DescriptorName_UI'].tolist()
]
edge_index_paper_mesh = torch.LongTensor(
    mesh_filtered[['PMID', 'DescriptorName_UI']].values.transpose())

#Create dataset
# edge_index_paper_paper = to_undirected(edge_index_paper_paper)
# edge_index_paper_mesh = to_undirected(edge_index_paper_mesh)

edge_type = torch.cat([
    torch.zeros(edge_index_paper_paper.size(1)),
    torch.ones(edge_index_paper_mesh.size(1))
], 0)
edge_index = torch.cat([edge_index_paper_paper, edge_index_paper_mesh], 1)

dataset = Data(x_paper=x_paper,
               x_mesh=x_mesh,
               edge_index=edge_index,
               edge_type=edge_type,
               mesh_feature_dim=x_mesh.size(1),
               paper_feature_dim=x_paper.size(1))
torch.save(dataset, 'het_graph_paper_mesh.pk')
Exemple #26
0
    def process_single_file(self, raw_file_name):
        with open(osp.join(self.raw_dir, raw_file_name), "rb") as fi:
            all_data = pickle.load(fi, encoding='iso-8859-1')

        batch_data = []
        for idata, data in enumerate(all_data):
            mat = data["dm"].copy()
            #set all edges with distance greater than 0.5 to 0
            md = mat.todense()
            md[md > 0.5] = 0
            mat = scipy.sparse.coo_matrix(md)

            mat_reco_cand = data["dm_elem_cand"].copy()
            mat_reco_gen = data["dm_elem_gen"].copy()

            mul1 = mat.multiply(mat_reco_cand)
            mul2 = mat.multiply(mat_reco_gen)
            mul1 = mul1 > 0
            mul2 = mul2 > 0
            if len(mat.row) > 0:
                mat_reco_cand = scipy.sparse.coo_matrix(
                    (np.array(mul1[mat.row, mat.col]).squeeze(),
                     (mat.row, mat.col)),
                    shape=(mat.shape[0], mat.shape[1]))
                mat_reco_gen = scipy.sparse.coo_matrix(
                    (np.array(mul2[mat.row, mat.col]).squeeze(),
                     (mat.row, mat.col)),
                    shape=(mat.shape[0], mat.shape[1]))
            else:
                mat_reco_cand = scipy.sparse.coo_matrix(
                    np.zeros((mat.shape[0], mat.shape[1])))
                mat_reco_gen = scipy.sparse.coo_matrix(
                    np.zeros((mat.shape[0], mat.shape[1])))

            X = data["Xelem"]
            ygen = data['ygen']
            ycand = data['ycand']
            #node_sel = X[:, 4] > 0.2
            #row_index, col_index, dm_data = mat.row, mat.col, mat.data

            #num_elements = X.shape[0]
            #num_edges = row_index.shape[0]

            #edge_index = np.zeros((2, 2*num_edges))
            #edge_index[0, :num_edges] = row_index
            #edge_index[1, :num_edges] = col_index
            #edge_index[0, num_edges:] = col_index
            #edge_index[1, num_edges:] = row_index
            #edge_index = torch.tensor(edge_index, dtype=torch.long)

            #edge_data = dm_data
            #edge_attr = np.zeros((2*num_edges, 1))
            #edge_attr[:num_edges,0] = edge_data
            #edge_attr[num_edges:,0] = edge_data
            #edge_attr = torch.tensor(edge_attr, dtype=torch.float)

            r = torch_geometric.utils.from_scipy_sparse_matrix(mat)
            rc = torch_geometric.utils.from_scipy_sparse_matrix(mat_reco_cand)
            rg = torch_geometric.utils.from_scipy_sparse_matrix(mat_reco_gen)

            #edge_index, edge_attr = torch_geometric.utils.subgraph(torch.tensor(node_sel, dtype=torch.bool),
            #    edge_index, edge_attr, relabel_nodes=True, num_nodes=len(X))

            x = torch.tensor(X, dtype=torch.float)
            ygen = torch.tensor(ygen, dtype=torch.float)
            ycand = torch.tensor(ycand, dtype=torch.float)

            data = Data(
                x=x,
                edge_index=r[0].to(dtype=torch.long),
                edge_attr=r[1].to(dtype=torch.float),
                ygen=ygen,
                ycand=ycand,
                target_edge_attr_cand=rc[1].to(dtype=torch.float),
                target_edge_attr_gen=rg[1].to(dtype=torch.float),
            )
            data_prep(data)
            batch_data += [data]

        return batch_data
 def _process(self, data_list):
     if len(data_list) == 0:
         return Data()
     data = Batch.from_data_list(data_list)
     delattr(data, "batch")
     return data
Exemple #28
0
                                                       hidden_dim4,
                                                       edge_input_dim)
        self.resmpblock4 = ResidualMessagePassingBlock(hidden_dim4,
                                                       hidden_dim5,
                                                       edge_input_dim)
        self.set2set = Set2Set(hidden_dim5, processing_steps=3)
        self.ffnn_out = torch.nn.Linear(hidden_dim5 * 2, output_dim)

    def forward(self, data):
        data.x = F.relu(self.ffnn(data.x))
        data = self.resmpblock0(data)
        data = self.resmpblock1(data)
        data = self.resmpblock2(data)
        data = self.resmpblock3(data)
        data = self.resmpblock4(data)
        x = self.set2set(data.x, data.batch)
        x = self.ffnn_out(x)
        return x.view(-1)


if __name__ == '__main__':
    from torch_geometric.data import Data

    data = Data(x=torch.rand([100, 18]),
                edge_attr=torch.rand([200, 7]),
                edge_index=torch.ones([2, 200]).long(),
                y=torch.ones([200]),
                batch=torch.zeros([200]).long())
    resmpblock0 = ResidualMessagePassingBlock(18, 18)
    print(resmpblock0(data))
def physnet_to_datalist(self,
                        N,
                        R,
                        E,
                        D,
                        Q,
                        Z,
                        num_mol,
                        mols,
                        efgs_batch,
                        EFG_R,
                        EFG_Z,
                        num_efg,
                        sol_data=None):
    """
    load data from PhysNet structure to InMemoryDataset structure (more compact)
    :return:
    """
    from rdkit.Chem.inchi import MolToInchi

    data_array = np.empty(num_mol, dtype=Data)
    t0 = time.time()
    Z_0 = Z[0, :]
    n_heavy = len(Z_0) - (Z_0 == 0).sum() - (Z_0 == 1).sum()

    jianing_to_dongdong_map = []

    for i in tqdm(range(num_mol)):
        if self.bond_atom_sep:
            mol = mols[i]
        else:
            mol = None
        # atomic infos
        _tmp_Data = Data()

        num_atoms = N[i]
        _tmp_Data.N = num_atoms.view(-1)
        _tmp_Data.R = R[i, :N[i]].view(-1, 3)
        _tmp_Data.E = E[i].view(-1)
        _tmp_Data.D = D[i].view(-1, 3)
        _tmp_Data.Q = Q[i].view(-1)
        _tmp_Data.Z = Z[i, :N[i]].view(-1)

        if self.cal_efg:
            _tmp_Data.atom_to_EFG_batch = efgs_batch[i, :N[i]].view(-1)
            _tmp_Data.EFG_R = EFG_R[i, :num_efg[i]].view(-1, 3)
            _tmp_Data.EFG_Z = EFG_Z[i, :num_efg[i]].view(-1)
            _tmp_Data.EFG_N = num_efg[i].view(-1)

        if sol_data is not None:
            # find molecule from solvation csv file based on InChI, if found, add it
            this_sol_data = sol_data.loc[sol_data["InChI"] == MolToInchi(mol)]
            if this_sol_data.shape[0] == 1:
                for key in sol_keys:
                    _tmp_Data.__setattr__(
                        key,
                        torch.as_tensor(this_sol_data.iloc[0][key]).view(-1))
                jianing_to_dongdong_map.append(1)
            else:
                jianing_to_dongdong_map.append(0)
                continue

        _tmp_Data = self.pre_transform(
            data=_tmp_Data,
            edge_version=self.edge_version,
            do_sort_edge=self.sort_edge,
            cal_efg=self.cal_efg,
            cutoff=self.cutoff,
            extended_bond=self.extended_bond,
            boundary_factor=self.boundary_factor,
            type_3_body=self.type_3_body,
            use_center=self.use_center,
            mol=mol,
            cal_3body_term=self.cal_3body_term,
            bond_atom_sep=self.bond_atom_sep,
            record_long_range=self.record_long_range)

        data_array[i] = _tmp_Data

    if sol_data is not None:
        torch.save(torch.as_tensor(jianing_to_dongdong_map),
                   "jianing_to_dongdong_map_{}.pt".format(n_heavy))

    data_list = [
        data_array[i] for i in range(num_mol) if data_array[i] is not None
    ]

    return data_list
    def inference(self, X, y):
        labels = torch.LongTensor().to(self.config.device)
        outputs = torch.FloatTensor().to(self.config.device)
        # Dictionary storing (output, label) pair for all driving categories
        categories = dict.fromkeys(self.unique_clips)
        for key, val in categories.items():
            categories[key] = {'outputs': outputs, 'labels': labels}
        acc_loss_test = 0
        folder_names = []
        attns_weights = []
        node_attns = []
        inference_time = 0

        with torch.no_grad():
            for i in range(len(X)):  # iterate through scenegraphs
                data, label, category = X[i]['sequence'], y[i], X[i][
                    'category']
                data_list = [
                    Data(x=g['node_features'],
                         edge_index=g['edge_index'],
                         edge_attr=g['edge_attr']) for g in data
                ]
                self.test_loader = DataLoader(data_list,
                                              batch_size=len(data_list))
                sequence = next(iter(self.test_loader)).to(self.config.device)
                self.model.eval()

                #start = torch.cuda.Event(enable_timing=True)
                #end =  torch.cuda.Event(enable_timing=True)
                #start.record()
                output, attns = self.model.forward(sequence.x,
                                                   sequence.edge_index,
                                                   sequence.edge_attr,
                                                   sequence.batch)
                #end.record()
                #torch.cuda.synchronize()
                inference_time += 0  #start.elapsed_time(end)
                loss_test = self.loss_func(
                    output.view(-1, 2),
                    torch.LongTensor([label]).to(self.config.device))
                acc_loss_test += loss_test.detach().cpu().item()
                label = torch.tensor(label,
                                     dtype=torch.long).to(self.config.device)
                # store output, label statistics
                self.update_categorical_outputs(categories, output, label,
                                                category)

                folder_names.append(X[i]['folder_name'])
                if 'lstm_attn_weights' in attns:
                    attns_weights.append(attns['lstm_attn_weights'].squeeze().
                                         detach().cpu().numpy().tolist())
                if 'pool_score' in attns:
                    node_attn = {}
                    node_attn["original_batch"] = sequence.batch.detach().cpu(
                    ).numpy().tolist()
                    node_attn["pool_perm"] = attns['pool_perm'].detach().cpu(
                    ).numpy().tolist()
                    node_attn["pool_batch"] = attns['batch'].detach().cpu(
                    ).numpy().tolist()
                    node_attn["pool_score"] = attns['pool_score'].detach().cpu(
                    ).numpy().tolist()
                    node_attns.append(node_attn)

        sum_seq_len = 0
        num_risky_sequences = 0
        sequences = len(categories['all']['labels'])
        for indices in range(sequences):
            seq_output = categories['all']['outputs'][indices]
            label = categories['all']['labels'][indices]
            pred = torch.argmax(seq_output)
            # risky clip
            if label == 1:
                num_risky_sequences += 1
                sum_seq_len += seq_output.shape[0]

        avg_risky_seq_len = sum_seq_len / num_risky_sequences

        return  categories, \
                folder_names, \
                acc_loss_test/len(X), \
                avg_risky_seq_len, \
                inference_time, \
                attns_weights, \
                node_attns