Exemple #1
0
    def process(self):
        import gzip
        import pandas as pd
        import rdflib as rdf

        graph_file, task_file, train_file, test_file = self.raw_paths

        g = rdf.Graph()
        with gzip.open(graph_file, 'rb') as f:
            g.parse(file=f, format='nt')

        freq_ = Counter(g.predicates())

        def freq(rel):
            return freq_[rel] if rel in freq_ else 0

        relations = sorted(set(g.predicates()), key=lambda rel: -freq(rel))
        subjects = set(g.subjects())
        objects = set(g.objects())
        nodes = list(subjects.union(objects))

        relations_dict = {rel: i for i, rel in enumerate(list(relations))}
        nodes_dict = {node: i for i, node in enumerate(nodes)}

        edge_list = []
        for s, p, o in g.triples((None, None, None)):
            src, dst, rel = nodes_dict[s], nodes_dict[o], relations_dict[p]
            edge_list.append([src, dst, 2 * rel])
            edge_list.append([dst, src, 2 * rel + 1])

        edge_list = sorted(edge_list, key=lambda x: (x[0], x[1], x[2]))
        edge = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
        edge_index, edge_type = edge[:2], edge[2]

        if self.name == 'am':
            label_header = 'label_cateogory'
            nodes_header = 'proxy'
        elif self.name == 'aifb':
            label_header = 'label_affiliation'
            nodes_header = 'person'
        elif self.name == 'mutag':
            label_header = 'label_mutagenic'
            nodes_header = 'bond'
        elif self.name == 'bgs':
            label_header = 'label_lithogenesis'
            nodes_header = 'rock'

        labels_df = pd.read_csv(task_file, sep='\t')
        labels_set = set(labels_df[label_header].values.tolist())
        labels_dict = {lab: i for i, lab in enumerate(list(labels_set))}
        nodes_dict = {np.unicode(key): val for key, val in nodes_dict.items()}

        train_labels_df = pd.read_csv(train_file, sep='\t')
        train_indices, train_labels = [], []
        for nod, lab in zip(train_labels_df[nodes_header].values,
                            train_labels_df[label_header].values):
            train_indices.append(nodes_dict[nod])
            train_labels.append(labels_dict[lab])

        train_idx = torch.tensor(train_indices, dtype=torch.long)
        train_y = torch.tensor(train_labels, dtype=torch.long)

        test_labels_df = pd.read_csv(test_file, sep='\t')
        test_indices, test_labels = [], []
        for nod, lab in zip(test_labels_df[nodes_header].values,
                            test_labels_df[label_header].values):
            test_indices.append(nodes_dict[nod])
            test_labels.append(labels_dict[lab])

        test_idx = torch.tensor(test_indices, dtype=torch.long)
        test_y = torch.tensor(test_labels, dtype=torch.long)

        data = Data(edge_index=edge_index)
        data.edge_type = edge_type
        data.train_idx = train_idx
        data.train_y = train_y
        data.test_idx = test_idx
        data.test_y = test_y
        data.num_nodes = edge_index.max().item() + 1

        data, slices = self.collate([data])
        torch.save((data, slices), self.processed_paths[0])
    def visualize_subgraph(self,
                           node_idx,
                           edge_index,
                           edge_mask,
                           y=None,
                           threshold=None,
                           edge_y=None,
                           node_alpha=None,
                           seed=10,
                           **kwargs):
        r"""Visualizes the subgraph given an edge mask
        :attr:`edge_mask`.

        Args:
            node_idx (int): The node id to explain.
                Set to :obj:`-1` to explain graph.
            edge_index (LongTensor): The edge indices.
            edge_mask (Tensor): The edge mask.
            y (Tensor, optional): The ground-truth node-prediction labels used
                as node colorings. All nodes will have the same color
                if :attr:`node_idx` is :obj:`-1`.(default: :obj:`None`).
            threshold (float, optional): Sets a threshold for visualizing
                important edges. If set to :obj:`None`, will visualize all
                edges with transparancy indicating the importance of edges.
                (default: :obj:`None`)
            edge_y (Tensor, optional): The edge labels used as edge colorings.
            node_alpha (Tensor, optional): Tensor of floats (0 - 1) indicating
                transparency of each node.
            seed (int, optional): Random seed of the :obj:`networkx` node
                placement algorithm. (default: :obj:`10`)
            **kwargs (optional): Additional arguments passed to
                :func:`nx.draw`.

        :rtype: :class:`matplotlib.axes.Axes`, :class:`networkx.DiGraph`
        """
        import matplotlib.pyplot as plt
        import networkx as nx

        assert edge_mask.size(0) == edge_index.size(1)

        if node_idx == -1:
            hard_edge_mask = torch.BoolTensor([True] * edge_index.size(1),
                                              device=edge_mask.device)
            subset = torch.arange(edge_index.max().item() + 1,
                                  device=edge_index.device)
            y = None

        else:
            # Only operate on a k-hop subgraph around `node_idx`.
            subset, edge_index, _, hard_edge_mask = k_hop_subgraph(
                node_idx,
                self.num_hops,
                edge_index,
                relabel_nodes=True,
                num_nodes=None,
                flow=self.__flow__())

        edge_mask = edge_mask[hard_edge_mask]

        if threshold is not None:
            edge_mask = (edge_mask >= threshold).to(torch.float)

        if y is None:
            y = torch.zeros(edge_index.max().item() + 1,
                            device=edge_index.device)
        else:
            y = y[subset].to(torch.float) / y.max().item()

        if edge_y is None:
            edge_color = ['black'] * edge_index.size(1)
        else:
            colors = list(plt.rcParams['axes.prop_cycle'])
            edge_color = [
                colors[i % len(colors)]['color']
                for i in edge_y[hard_edge_mask]
            ]

        data = Data(edge_index=edge_index,
                    att=edge_mask,
                    edge_color=edge_color,
                    y=y,
                    num_nodes=y.size(0)).to('cpu')
        G = to_networkx(data,
                        node_attrs=['y'],
                        edge_attrs=['att', 'edge_color'])
        mapping = {k: i for k, i in enumerate(subset.tolist())}
        G = nx.relabel_nodes(G, mapping)

        node_args = set(signature(nx.draw_networkx_nodes).parameters.keys())
        node_kwargs = {k: v for k, v in kwargs.items() if k in node_args}
        node_kwargs['node_size'] = kwargs.get('node_size') or 800
        node_kwargs['cmap'] = kwargs.get('cmap') or 'cool'

        label_args = set(signature(nx.draw_networkx_labels).parameters.keys())
        label_kwargs = {k: v for k, v in kwargs.items() if k in label_args}
        label_kwargs['font_size'] = kwargs.get('font_size') or 10

        pos = nx.spring_layout(G, seed=seed)
        ax = plt.gca()
        for source, target, data in G.edges(data=True):
            ax.annotate('',
                        xy=pos[target],
                        xycoords='data',
                        xytext=pos[source],
                        textcoords='data',
                        arrowprops=dict(
                            arrowstyle="->",
                            alpha=max(data['att'], 0.1),
                            color=data['edge_color'],
                            shrinkA=sqrt(node_kwargs['node_size']) / 2.0,
                            shrinkB=sqrt(node_kwargs['node_size']) / 2.0,
                            connectionstyle="arc3,rad=0.1",
                        ))

        if node_alpha is None:
            nx.draw_networkx_nodes(G,
                                   pos,
                                   node_color=y.tolist(),
                                   **node_kwargs)
        else:
            node_alpha_subset = node_alpha[subset]
            assert ((node_alpha_subset >= 0) & (node_alpha_subset <= 1)).all()
            nx.draw_networkx_nodes(G,
                                   pos,
                                   alpha=node_alpha_subset.tolist(),
                                   node_color=y.tolist(),
                                   **node_kwargs)

        nx.draw_networkx_labels(G, pos, **label_kwargs)

        return ax, G
 def _process(self, data_list):
     if len(data_list) == 0:
         return Data()
     data = Batch.from_data_list(data_list)
     delattr(data, "batch")
     return data
    def visualize_subgraph(self,
                           node_idx,
                           edge_index,
                           edge_mask,
                           y=None,
                           k=2,
                           threshold=None,
                           **kwargs):
        r"""Visualizes the subgraph around :attr:`node_idx` given an edge mask
        :attr:`edge_mask`.

        Args:
            node_idx (int): The node id to explain.
            edge_index (LongTensor): The edge indices.
            edge_mask (Tensor): The edge mask.
            y (Tensor, optional): The ground-truth node-prediction labels used
                as node colorings. (default: :obj:`None`)
            threshold (float, optional): Sets a threshold for visualizing
                important edges. If set to :obj:`None`, will visualize all
                edges with transparancy indicating the importance of edges.
                (default: :obj:`None`)
            **kwargs (optional): Additional arguments passed to
                :func:`nx.draw`.

        :rtype: :class:`matplotlib.axes.Axes`, :class:`networkx.DiGraph`
        """

        # Only operate on a k-hop subgraph around `node_idx`.
        subset, edge_index, _, _ = k_hop_subgraph(node_idx,
                                                  k,
                                                  edge_index,
                                                  relabel_nodes=True)

        if threshold is not None:
            edge_mask = (edge_mask >= threshold).to(torch.float)

        if y is None:
            y = torch.zeros(edge_index.max().item() + 1,
                            device=edge_index.device)
        else:
            y = y[subset].to(torch.float) / y.max().item()

        data = Data(edge_index=edge_index,
                    att=edge_mask,
                    y=y,
                    num_nodes=y.size(0)).to('cpu')
        G = to_networkx(data, node_attrs=['y'], edge_attrs=['att'])
        mapping = {k: i for k, i in enumerate(subset.tolist())}
        G = nx.relabel_nodes(G, mapping)

        node_kwargs = copy(kwargs)
        node_kwargs['node_size'] = kwargs.get('node_size') or 800
        node_kwargs['cmap'] = kwargs.get('cmap') or 'Accent'

        label_kwargs = copy(kwargs)
        label_kwargs['font_size'] = kwargs.get('font_size') or 10

        pos = nx.spring_layout(G)
        ax = plt.gca()
        ax.axis('off')
        for source, target, data in G.edges(data=True):
            ax.annotate('',
                        xy=pos[target],
                        xycoords='data',
                        xytext=pos[source],
                        textcoords='data',
                        arrowprops=dict(
                            arrowstyle="->",
                            alpha=max(data['att'], 0.05),
                            shrinkA=sqrt(node_kwargs['node_size']) / 2.0,
                            shrinkB=sqrt(node_kwargs['node_size']) / 2.0,
                            connectionstyle="arc3,rad=0.1",
                        ))
        nx.draw_networkx_nodes(G, pos, node_color=y.tolist(), **node_kwargs)
        nx.draw_networkx_labels(G, pos, **label_kwargs)

        return ax, G
Exemple #5
0
            node2id[node1] = id1

        try:
            id2 = node2id[node2]
        except:
            id2 = len(node2id)
            node2id[node2] = id2
        
        edge_list.add((id1, id2))
        # edge_list.add((id2, id1))
    except:
        pass

edge_index = torch.tensor(np.array(edge_list).T, dtype=torch.long)

data = Data(edge_index=edge_index)

model = Node2Vec(data.edge_index, embedding_dim=128, walk_length=4,
            context_size=2, walks_per_node=2, sparse=True).to(device)

loader = model.loader(batch_size=2000, shuffle=True, num_workers=12)
optimizer = torch.optim.SparseAdam(model.parameters(), lr=0.01)

for epoch in range(EPOCHS):
    model.train()

    # total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
Exemple #6
0
def perturb_edges(data,
                  name,
                  remove_pct,
                  add_pct,
                  hidden_channels=16,
                  epochs=400):
    if remove_pct == 0 and add_pct == 0:
        return
    try:
        cached = pickle.load(
            open(f'{ROOT}/cache/edge/{name}_{remove_pct}_{add_pct}.pt', 'rb'))
        print(f'Use cached edge augmentation for dataset {name}')

        if data.setting == 'inductive':
            data.train_edge_index = cached
        else:
            data.edge_index = cached
        return
    except FileNotFoundError:
        try:
            A_pred, adj_orig = pickle.load(
                open(f'{ROOT}/cache/edge/{name}.pt', 'rb'))
            A = sample_graph_det(adj_orig, A_pred, remove_pct, add_pct)
            data.edge_index, _ = from_scipy_sparse_matrix(A)
            pickle.dump(
                data.edge_index,
                open(f'{ROOT}/cache/edge/{name}_{remove_pct}_{add_pct}.pt',
                     'wb'))
            return
        except FileNotFoundError:
            print(
                f'cache/edge/{name}_{remove_pct}_{add_pct}.pt not found! Regenerating it now'
            )

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    if data.setting == 'inductive':
        train_data = Data(x=data.train_x,
                          ori_x=data.ori_x,
                          edge_index=data.train_edge_index,
                          y=data.train_y)
    else:
        train_data = deepcopy(data)

    edge_index = deepcopy(train_data.edge_index)
    train_data = train_test_split_edges(train_data,
                                        val_ratio=0.1,
                                        test_ratio=0)
    num_features = train_data.ori_x.shape[1]
    model = GAE(GCNEncoder(num_features, hidden_channels))
    model = model.to(device)
    x = train_data.ori_x.to(device)
    train_pos_edge_index = train_data.train_pos_edge_index.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    best_val_auc = 0
    best_z = None
    for epoch in range(1, epochs + 1):
        model.train()
        optimizer.zero_grad()
        z = model.encode(x, train_pos_edge_index)
        loss = model.recon_loss(z, train_pos_edge_index)
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            z = model.encode(x, train_pos_edge_index)

        auc, ap = model.test(z, train_data.val_pos_edge_index,
                             train_data.val_neg_edge_index)
        print('Val | Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(
            epoch, auc, ap))
        if auc > best_val_auc:
            best_val_auc = auc
            best_z = deepcopy(z)

    A_pred = torch.sigmoid(torch.mm(z, z.T)).cpu().numpy()

    adj_orig = to_scipy_sparse_matrix(edge_index).asformat('csr')
    adj_pred = sample_graph_det(adj_orig, A_pred, remove_pct, add_pct)

    if data.setting == 'inductive':
        data.train_edge_index, _ = from_scipy_sparse_matrix(adj_pred)
    else:
        data.edge_index, _ = from_scipy_sparse_matrix(adj_pred)

    pickle.dump((A_pred, adj_orig), open(f'{ROOT}/cache/edge/{name}.pt', 'wb'))

    if data.setting == 'inductive':
        pickle.dump(
            data.train_edge_index,
            open(f'{ROOT}/cache/edge/{name}_{remove_pct}_{add_pct}.pt', 'wb'))
    else:
        pickle.dump(
            data.edge_index,
            open(f'{ROOT}/cache/edge/{name}_{remove_pct}_{add_pct}.pt', 'wb'))
Exemple #7
0
def start_view(args):
    outdir = args.outdir
    event = master.Event(utils_dir.inputdir)
    event.read(args.evtid)

    # randomly select N particles with each having at least 6 hits
    pids = event.particles[(event.particles.nhits) > 5]
    np.random.seed(args.seed)
    rnd = np.random.randint(0, pids.shape[0], args.npids)
    sel_pids = pids.particle_id.values[rnd]

    event._hits = event.hits[event.hits.particle_id.isin(sel_pids)]
    hits = event.cluster_info(utils_dir.detector_path)

    # track labeling -- determine true edges...
    hits = hits.assign(R=np.sqrt((hits.x - hits.vx)**2 +
                                 (hits.y - hits.vy)**2 +
                                 (hits.z - hits.vz)**2))
    hits = hits.sort_values('R').reset_index(drop=True).reset_index(drop=False)
    hit_list = hits.groupby(
        ['particle_id', 'layer'],
        sort=False)['index'].agg(lambda x: list(x)).groupby(
            level=0).agg(lambda x: list(x))
    e = []
    for row in hit_list.values:
        for i, j in zip(row[0:-1], row[1:]):
            e.extend(list(itertools.product(i, j)))
    layerless_true_edges = np.array(e).T

    # input data for embedding
    data = Data(x=torch.from_numpy(hits[['r', 'phi', 'z']].to_numpy()/np.array([1000, np.pi, 1000])).float(),\
            pid=torch.from_numpy(hits.particle_id.to_numpy()),
            layers=torch.from_numpy(hits.layer.to_numpy()), hid=torch.from_numpy(hits.hit_id.to_numpy()))
    cell_features = [
        'cell_count', 'cell_val', 'leta', 'lphi', 'lx', 'ly', 'lz', 'geta',
        'gphi'
    ]
    data.layerless_true_edges = torch.from_numpy(layerless_true_edges)
    data.cell_data = torch.from_numpy(hits[cell_features].values).float()

    action = 'embedding'

    config_file = pkg_resources.resource_filename(
        "exatrkx", os.path.join('configs', config_dict[action]))
    with open(config_file) as f:
        e_config = yaml.load(f, Loader=yaml.FullLoader)

    e_config['train_split'] = [1, 0, 0]
    e_config['r_val'] = 2.0
    e_model = LayerlessEmbedding(e_config)
    e_model = e_model.load_from_checkpoint(args.embed_ckpt_dir,
                                           hparams=e_config)
    e_model.eval()
    spatial = e_model(torch.cat([data.cell_data, data.x], axis=-1))
    spatial_np = spatial.detach().numpy()

    # plot hits in the embedding space
    embedding_dims = [(0, 1), (2, 3), (4, 5), (6, 7)]
    for id1, id2 in embedding_dims:
        fig = plt.figure(figsize=(6, 6))
        for pid in sel_pids:
            idx = hits.particle_id == pid
            plt.scatter(spatial_np[idx, id1], spatial_np[idx, id2])

        plt.savefig(
            os.path.join(outdir, "embedding_{}_{}.pdf".format(id1, id2)))
        del fig

    # build edges from the embedding space
    e_spatial = utils_torch.build_edges(spatial, e_model.hparams['r_val'],
                                        e_model.hparams['knn_val'])
    e_spatial_np = e_spatial.detach().numpy()

    # view hits with or without edge candidates...
    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(111, projection='3d')
    for pid in sel_pids:
        ax.scatter(hits[hits.particle_id == pid].x.values,
                   hits[hits.particle_id == pid].y.values,
                   hits[hits.particle_id == pid].z.values)
    # add edges
    e_spatial_np_t = e_spatial_np.T
    for iedge in range(e_spatial_np.shape[1]):
        ax.plot(hits.iloc[e_spatial_np_t[iedge]].x.values,
                hits.iloc[e_spatial_np_t[iedge]].y.values,
                hits.iloc[e_spatial_np_t[iedge]].z.values,
                color='k',
                alpha=0.3,
                lw=1.)
    ax.set_xlabel('X Label')
    ax.set_ylabel('Y Label')
    ax.set_zlabel('Z Label')
    plt.savefig(os.path.join(outdir, "emedding_edges_3d.pdf"))
    del fig
    del ax

    e_spatial_np_t = e_spatial_np.T
    layerless_true_edges_t = layerless_true_edges.T  # same as e

    def plot_edges(xname,
                   yname,
                   xlabel,
                   ylabel,
                   outname,
                   with_edges=True,
                   no_axis=False,
                   edges=e_spatial_np_t):
        fig = plt.figure(figsize=(8, 8))
        ax = fig.add_subplot(111)
        for pid in sel_pids:
            ax.scatter(hits[hits.particle_id == pid][xname].values,
                       hits[hits.particle_id == pid][yname].values)
        # add edges
        if with_edges:
            for iedge in range(edges.shape[0]):
                ax.plot(hits.iloc[edges[iedge]][xname].values,\
                        hits.iloc[edges[iedge]][yname].values, color='k', alpha=0.3, lw=1.)
        ax.set_xlabel(xlabel, fontsize=16)
        ax.set_ylabel(ylabel, fontsize=16)
        if xname == 'z':
            ax.set_xlim(-3000, 3000)
        trans = False
        if no_axis:
            ax.set_axis_off()
            trans = True
            plt.savefig(os.path.join(outdir, "{}.png".format(outname)),
                        transparent=trans)
        plt.savefig(os.path.join(outdir, "{}.pdf".format(outname)),
                    transparent=trans)

    def plot_hits(xname, yname, outname):
        fig = plt.figure(figsize=(8, 8))
        ax = fig.add_subplot(111)
        ax.scatter(hits[xname].values, hits[yname].values)
        if xname == 'z':
            ax.set_xlim(-3000, 3000)
        ax.set_xlabel(xname, fontsize=16)
        ax.set_ylabel(yname, fontsize=16)
        plt.savefig(os.path.join(outdir, "{}.pdf".format(outname)))

    plot_edges("x", 'y', 'x', 'y', 'embedding_edges_x_y')
    plot_edges("z", 'r', 'z', 'r', 'embedding_edges_z_r')
    plot_edges("x",
               'y',
               'x',
               'y',
               'embedding_edges_truth_x_y',
               edges=layerless_true_edges_t)
    plot_edges("z",
               'r',
               'z',
               'r',
               'embedding_edges_truth_z_r',
               edges=layerless_true_edges_t)
    plot_edges("x",
               'y',
               'x',
               'y',
               'embedding_hits_truth_x_y',
               with_edges=False)
    plot_edges("z",
               'r',
               'z',
               'r',
               'embedding_hits_truth_z_r',
               with_edges=False)
    plot_hits("x", 'y', 'embedding_hits_x_y')
    plot_hits("z", 'r', 'embedding_hits_z_r')
    plot_edges("x", 'y', 'x', 'y', 'embedding_front', no_axis=True)
Exemple #8
0
    def torch_geometric_graph_from_pdb_code(self,
                                            pdb_code,
                                            chain_selection='all',
                                            edge_construction=['contacts'],
                                            contact_file=None,
                                            encoding=False,
                                            k_nn=None,
                                            custom_edges=None):
        """
        Produces a PyToch Geometric Data object from a protein structure

        :param k_nn: Specifies  K nearest neighbours to use in KNN edge construction, defaults to None
        :type k_nn: int, optional
        :param custom_edges: User-supplied edges to use, defaults to None
        :type custom_edges: Pandas DataFrame, optional
        :param encoding:
        :type encoding: bool
        :param edge_construction: List containing edge construction to be used. ['contacts', 'distance', 'delaunay'], defaults to ['contacts']
        :type edge_construction: list
        :param pdb_code: 4-character PDB accession code
        :type pdb_code: str
        :param chain_selection: Specifies polypeptide chains to include. e.g. one of {'A', 'B' ,'AB', 'BC'}, defaults to 'all'
        :type chain_selection: str
        :param contact_file: Path to contact file if using local file.
        :type contact_file: str
        :return: Pytorch Geometric Graph of protein structure.
        :rtype: PyTorch Geometric Data object
        """
        assert encoding, 'Non-numeric feature encoding must be True'

        g, resiude_name_encoder, residue_id_encoder = self.dgl_graph_from_pdb_code(
            pdb_code=pdb_code,
            chain_selection=chain_selection,
            contact_file=contact_file,
            edge_construction=edge_construction,
            custom_edges=custom_edges,
            encoding=encoding,
            k_nn=k_nn)
        # Get node features from DGL graph and concatenate them
        node_feature_names = g.node_attr_schemes().keys()
        dgl_graph_features = [
            g.ndata[feat].float() for feat in node_feature_names
        ]
        dgl_graph_features = [
            f.unsqueeze(dim=1) if len(f.shape) == 1 else f
            for f in dgl_graph_features
        ]
        node_features = torch.cat(dgl_graph_features, dim=1)

        # Get edge features from DGL graph and concatenate them
        edge_types = g.edge_attr_schemes().keys()
        edge_feats = [g.edata[e].float() for e in edge_types]
        edge_feats = [
            e.unsqueeze(dim=1) if len(e.shape) == 1 else e for e in edge_feats
        ]
        edge_feats = torch.cat(edge_feats, dim=1)

        # Create the Torch Geometric graph
        geom_graph = (Data(x=node_features,
                           edge_index=torch.stack(g.edges(), dim=1),
                           edge_attr=edge_feats))
        print(geom_graph)
        return geom_graph
Exemple #9
0
    def download(self):
        assert len(self.path) == 2
        path_data = self.path[0]
        path_label = self.path[1]

        labels = read_xlsx(path_label)
        labels = labels.astype({'subject': 'str'})
        labels['SITE_ID'], uniques = pd.factorize(labels['SITE_ID'])
        labels['DX_GROUP'] = 2 - labels['DX_GROUP']
        labels['SEX'] = labels['SEX'] - 1

        labels = itemgetter('SITE_ID', 'DX_GROUP', 'DSM_IV_TR', 'AGE_AT_SCAN',
                            'SEX')(labels.set_index('subject').to_dict())

        subjlist = os.listdir(path_data)
        filelist = [
            fname for fname in os.listdir(os.path.join(path_data, subjlist[0]))
            if 'matrix' in fname
        ]

        with open(os.path.join(self.raw_dir, 'abide_raw_info.txt'), 'w') as f:
            print('Label info:', file=f)
            print('All labels:', 'SITE_ID', 'DX_GROUP', 'DSM_IV_TR',
                  'AGE_AT_SCAN', 'SEX')
            print('Site labels (0-n):', uniques.values, file=f)
            print('DX_GROUP (0/1):', 'control, autism', file=f)
            print('DSM_IV_TR (0-n):',
                  'control, autism, aspergers, PDD-NOS, aspergers or PDD-NOS',
                  file=f)
            print('SEX (0/1):', 'M, F', file=f)
            print('\n', file=f)
            print('Features:', file=f)
            print(filelist, sep='\n', file=f)
            print('\n', file=f)
            print('Saved subjects:', file=f)
            print(subjlist, sep='\n', file=f)
            print('\n', file=f)

        dataset = {}
        for subj in subjlist:
            print('downloading', subj, '...')
            features = []
            for file in filelist:
                filepath = os.path.join(path_data, subj, file)
                # origianl value (-1 ~ 1), adjust value of the matrix to 0 ~ 2
                matrix = torch.tensor(np.loadtxt(filepath),
                                      dtype=torch.float32) + 1
                features.append(matrix)

            y = {
                'SITE_ID': labels[0][subj],
                'DX_GROUP': labels[1][subj],
                'DSM_IV_TR': labels[2][subj],
                'AGE_AT_SCAN': labels[3][subj],
                'SEX': labels[4][subj]
            }
            x = torch.ones([matrix.shape[0], 1], dtype=torch.float32)
            data = Data(x=x, y=y)
            data.features = features
            dataset[subj] = data

        with open(os.path.join(self.raw_dir, 'abide_raw.pkl'), 'wb') as f:
            pickle.dump(dataset, f)
            print('ABIDE dataset saved to path:', self.raw_dir)
Exemple #10
0
def generate_torchgeom_dataset(data):
    """Returns dataset that can be used to train our model.
    
    Args:
        data (dict): Data dictionary with keys t, x, u.
    Returns:
        dataset (list): Array of torchgeometric Data objects.
    """

    n_sims = data['u'].shape[0]
    dataset = []

    for sim_ind in range(n_sims):
        print("{} / {}".format(sim_ind+1, n_sims))
        
        x = data['x'][sim_ind]
        tri = Delaunay(x)
        neighbors = neighbors_from_delaunay(tri)

        # Find periodic couples and merge their neighborhoods
        origin_node = 0
        corner_nodes = []
        hor_couples = []
        vert_couples = []
        eps = 1.0e-6

        b = x.ravel().max()  # domain size

        for i in range(x.shape[0]):
            if is_near(x[i], [[b, 0], [0, b], [b, b]]):
                corner_nodes.append(i)
            elif is_near(x[i], [[0, 0]]):
                origin_node = i
            elif abs(x[i, 0]) < eps:  # left boundary
                for j in range(x.shape[0]):
                    if abs(x[j, 0] - b) < eps and abs(x[j, 1] - x[i, 1]) < eps:
                        hor_couples.append([i, j])
            elif abs(x[i, 1]) < eps:  # bottom boundary
                for j in range(x.shape[0]):
                    if abs(x[j, 1] - b) < eps and abs(x[j, 0] - x[i, 0]) < eps:
                        vert_couples.append([i, j])

        remove_nodes = []

        # Merge corners
        for i in corner_nodes:
            neighbors[origin_node].extend(neighbors[i])
            remove_nodes.append(i)

        # Merge horizontal couples
        for i, j in hor_couples:
            neighbors[i].extend(neighbors[j])
            remove_nodes.append(j)

        # Merge vertical couples
        for i, j in vert_couples:
            neighbors[i].extend(neighbors[j])
            remove_nodes.append(j)

        use_nodes = list(set(range(len(x))) - set(remove_nodes))

        # Remove right and top boundaries
        neighbors = np.array(neighbors, dtype=np.object)[use_nodes]

        # Rewrite indices of the removed nodes
        map_domain = corner_nodes + [x[1] for x in hor_couples] + [x[1] for x in vert_couples]
        map_codomain = [origin_node]*3 + [x[0] for x in hor_couples] + [x[0] for x in vert_couples]
        map_inds = dict(zip(map_domain, map_codomain))

        for i in range(len(neighbors)):
            for j in range(len(neighbors[i])):
                if neighbors[i][j] in remove_nodes:
                    neighbors[i][j] = map_inds[neighbors[i][j]]
            neighbors[i] = list(set(neighbors[i]))  # remove duplicates

        # Reset indices
        map_inds = dict(zip(use_nodes, range(len(use_nodes))))

        for i in range(len(neighbors)):
            for j in range(len(neighbors[i])):
                neighbors[i][j] = map_inds[neighbors[i][j]]

        # ...
        edge_index = []
        for i, _ in enumerate(neighbors):
            for _, neighbor in enumerate(neighbors[i]):
                if i == neighbor:
                    continue
                edge = [i, neighbor]
                edge_index.append(edge)
        edge_index = np.array(edge_index).T

        # coords_use = data['x'][sim_ind, use_nodes]
        # coords_rem = data['x'][sim_ind, remove_nodes]
        # plt.scatter(coords_use[:, 0], coords_use[:, 1], s=3)
        # plt.scatter(coords_rem[:, 0], coords_rem[:, 1], s=3)
        # plt.savefig("tmp.png")
        # print(qwe)
        
        n = None
        print(f"generate_torchgeom_dataset() -> using {n} steps.")
        tg_data = Data(
            x=torch.Tensor(data['u'][sim_ind, 0, use_nodes, :]),
            edge_index=torch.Tensor(edge_index).long(),
            y=torch.Tensor(data['u'][sim_ind][0:n, use_nodes]).transpose(0, 1),
            pos=torch.Tensor(data['x'][sim_ind, use_nodes]),
            t=torch.Tensor(data['t'][sim_ind][0:n]),
        )
        
        dataset.append(tg_data)

    return dataset
Exemple #11
0
                                      walk_length=walk_length,
                                      p=p,
                                      q=q)
            paraller.parser = self._sample
            for item in paraller.run():
                if out is None:
                    out = item.get()
                else:
                    out = torch.cat((out, item.get()), 0)
        else:
            out = self._sample(0,
                               start.size(0),
                               start=start,
                               walk_length=walk_length,
                               p=p,
                               q=q)
        return out


if __name__ == "__main__":
    edge_index = torch.tensor([[0, 1, 1], [1, 0, 2]], dtype=torch.long)
    x = torch.tensor([[-1], [0], [1]], dtype=torch.float)
    edge_weight = torch.tensor([1, 0.2, 0.8])
    start = torch.tensor([0, 1, 2])
    data = Data(x=x, edge_index=edge_index)
    rw = RandomWalk(data,
                    edge_weight=edge_weight,
                    is_parallel=False,
                    reverse=True)
    print(rw.walk(start, walk_length=5, p=0.2, q=0.5))
Exemple #12
0
def test_graph_saint():
    adj = torch.tensor([
        [+1, +2, +3, +0, +4, +0],
        [+5, +6, +0, +7, +0, +8],
        [+9, +0, 10, +0, 11, +0],
        [+0, 12, +0, 13, +0, 14],
        [15, +0, 16, +0, 17, +0],
        [+0, 18, +0, 19, +0, 20],
    ])

    edge_index = adj.nonzero(as_tuple=False).t()
    edge_type = adj[edge_index[0], edge_index[1]]
    x = torch.Tensor([[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]])
    data = Data(edge_index=edge_index, x=x, edge_type=edge_type, num_nodes=6)

    torch.manual_seed(12345)
    loader = GraphSAINTNodeSampler(data, batch_size=3, num_steps=4,
                                   sample_coverage=10, log=False)

    sample = next(iter(loader))
    assert sample.x.tolist() == [[2, 2], [4, 4], [5, 5]]
    assert sample.edge_index.tolist() == [[0, 0, 1, 1, 2], [0, 1, 0, 1, 2]]
    assert sample.edge_type.tolist() == [10, 11, 16, 17, 20]

    assert len(loader) == 4
    for sample in loader:
        assert len(sample) == 5
        assert sample.num_nodes <= 3
        assert sample.num_edges <= 3 * 4
        assert sample.node_norm.numel() == sample.num_nodes
        assert sample.edge_norm.numel() == sample.num_edges

    torch.manual_seed(12345)
    loader = GraphSAINTEdgeSampler(data, batch_size=2, num_steps=4,
                                   sample_coverage=10, log=False)

    sample = next(iter(loader))
    assert sample.x.tolist() == [[0, 0], [2, 2], [3, 3]]
    assert sample.edge_index.tolist() == [[0, 0, 1, 1, 2], [0, 1, 0, 1, 2]]
    assert sample.edge_type.tolist() == [1, 3, 9, 10, 13]

    assert len(loader) == 4
    for sample in loader:
        assert len(sample) == 5
        assert sample.num_nodes <= 4
        assert sample.num_edges <= 4 * 4
        assert sample.node_norm.numel() == sample.num_nodes
        assert sample.edge_norm.numel() == sample.num_edges

    torch.manual_seed(12345)
    loader = GraphSAINTRandomWalkSampler(data, batch_size=2, walk_length=1,
                                         num_steps=4, sample_coverage=10,
                                         log=False)

    sample = next(iter(loader))
    assert sample.x.tolist() == [[1, 1], [2, 2], [4, 4]]
    assert sample.edge_index.tolist() == [[0, 1, 1, 2, 2], [0, 1, 2, 1, 2]]
    assert sample.edge_type.tolist() == [6, 10, 11, 16, 17]

    assert len(loader) == 4
    for sample in loader:
        assert len(sample) == 5
        assert sample.num_nodes <= 4
        assert sample.num_edges <= 4 * 4
        assert sample.node_norm.numel() == sample.num_nodes
        assert sample.edge_norm.numel() == sample.num_edges
    def process(self):
        import gzip
        import pandas as pd
        import rdflib as rdf

        graph_file, task_file, train_file, test_file = self.raw_paths

        with hide_stdout():
            g = rdf.Graph()
            with gzip.open(graph_file, 'rb') as f:
                g.parse(file=f, format='nt')

        freq = Counter(g.predicates())

        relations = sorted(set(g.predicates()), key=lambda p: -freq.get(p, 0))
        subjects = set(g.subjects())
        objects = set(g.objects())
        nodes = list(subjects.union(objects))

        N = len(nodes)
        R = 2 * len(relations)

        relations_dict = {rel: i for i, rel in enumerate(relations)}
        nodes_dict = {node: i for i, node in enumerate(nodes)}

        edges = []
        for s, p, o in g.triples((None, None, None)):
            src, dst, rel = nodes_dict[s], nodes_dict[o], relations_dict[p]
            edges.append([src, dst, 2 * rel])
            edges.append([dst, src, 2 * rel + 1])

        edges = torch.tensor(edges, dtype=torch.long).t().contiguous()
        perm = (N * R * edges[0] + R * edges[1] + edges[2]).argsort()
        edges = edges[:, perm]

        edge_index, edge_type = edges[:2], edges[2]

        if self.name == 'am':
            label_header = 'label_cateogory'
            nodes_header = 'proxy'
        elif self.name == 'aifb':
            label_header = 'label_affiliation'
            nodes_header = 'person'
        elif self.name == 'mutag':
            label_header = 'label_mutagenic'
            nodes_header = 'bond'
        elif self.name == 'bgs':
            label_header = 'label_lithogenesis'
            nodes_header = 'rock'

        labels_df = pd.read_csv(task_file, sep='\t')
        labels_set = set(labels_df[label_header].values.tolist())
        labels_dict = {lab: i for i, lab in enumerate(list(labels_set))}
        nodes_dict = {np.unicode(key): val for key, val in nodes_dict.items()}

        train_labels_df = pd.read_csv(train_file, sep='\t')
        train_indices, train_labels = [], []
        for nod, lab in zip(train_labels_df[nodes_header].values,
                            train_labels_df[label_header].values):
            train_indices.append(nodes_dict[nod])
            train_labels.append(labels_dict[lab])

        train_idx = torch.tensor(train_indices, dtype=torch.long)
        train_y = torch.tensor(train_labels, dtype=torch.long)

        test_labels_df = pd.read_csv(test_file, sep='\t')
        test_indices, test_labels = [], []
        for nod, lab in zip(test_labels_df[nodes_header].values,
                            test_labels_df[label_header].values):
            test_indices.append(nodes_dict[nod])
            test_labels.append(labels_dict[lab])

        test_idx = torch.tensor(test_indices, dtype=torch.long)
        test_y = torch.tensor(test_labels, dtype=torch.long)

        if not self.hetero:
            data = Data(edge_index=edge_index,
                        edge_type=edge_type,
                        train_idx=train_idx,
                        train_y=train_y,
                        test_idx=test_idx,
                        test_y=test_y,
                        num_nodes=N)
        else:
            data = HeteroData(
                v={
                    'train_idx': train_idx,
                    'train_y': train_y,
                    'test_idx': test_idx,
                    'test_y': test_y,
                    'num_nodes': N,
                })
            for i in range(R):
                mask = edge_type == i
                data['v', f'{i}', 'v'].edge_index = edge_index[:, mask]

        torch.save(self.collate([data]), self.processed_paths[0])
Exemple #14
0
    def to_graph(self, threshold=None, format='edge_list', split=True,
                 frac=[0.7, 0.1, 0.2], seed=42, order='descending'):
        """Add a method description here.

        Parameters
        ----------
        threshold :
            Add a variable description here.

        format :
            Add a variable description here.

        split :
            Add a variable description here.

        frac : list, optional (default=frac=[0.7, 0.1, 0.2])
            Train/val/test split fractions.

        seed : int
            Add a variable description here.

        order :
            Add a variable description here.

        Returns
        -------

        """
        '''
        Arguments:
            format: edge_list / dgl / pyg df object
        '''

        df = self.get_data(format='df')

        if len(np.unique(self.raw_y)) > 2:
            print("The dataset label consists of affinity scores. "
                  "Binarization using threshold " +
                  str(threshold) +
                  " is conducted to construct the positive edges in the network. "
                  "Adjust the threshold by to_graph(threshold = X)",
                  flush=True, file=sys.stderr)
            if threshold is None:
                raise AttributeError(
                    "Please specify the threshold to binarize the data by "
                    "'to_graph(threshold = N)'!")
            df['label_binary'] = label_transform(self.raw_y, True, threshold,
                                                 False, verbose=False,
                                                 order=order)
        else:
            # already binary
            df['label_binary'] = df['Y']

        df[self.entity1_name + '_ID'] = df[self.entity1_name + '_ID'].astype(str)
        df[self.entity2_name + '_ID'] = df[self.entity2_name + '_ID'].astype(str)
        df_pos = df[df.label_binary == 1]
        df_neg = df[df.label_binary == 0]

        return_dict = {}

        pos_edges = df_pos[
            [self.entity1_name + '_ID', self.entity2_name + '_ID']].values
        neg_edges = df_neg[
            [self.entity1_name + '_ID', self.entity2_name + '_ID']].values
        edges = df[
            [self.entity1_name + '_ID', self.entity2_name + '_ID']].values

        if format == 'edge_list':
            return_dict['edge_list'] = pos_edges
            return_dict['neg_edges'] = neg_edges
        elif format == 'dgl':
            try:
                import dgl
            except:
                install("dgl")
                import dgl
            unique_entities = np.unique(pos_edges.T.flatten()).tolist()
            index = list(range(len(unique_entities)))
            dict_ = dict(zip(unique_entities, index))
            edge_list1 = np.array([dict_[i] for i in pos_edges.T[0]])
            edge_list2 = np.array([dict_[i] for i in pos_edges.T[1]])
            return_dict['dgl_graph'] = dgl.DGLGraph((edge_list1, edge_list2))
            return_dict['index_to_entities'] = dict_

        elif format == 'pyg':
            try:
                import torch
                from torch_geometric.data import Data
            except:
                raise ImportError(
                    "Please see https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html to install pytorch geometric!")

            unique_entities = np.unique(pos_edges.T.flatten()).tolist()
            index = list(range(len(unique_entities)))
            dict_ = dict(zip(unique_entities, index))
            edge_list1 = np.array([dict_[i] for i in pos_edges.T[0]])
            edge_list2 = np.array([dict_[i] for i in pos_edges.T[1]])

            edge_index = torch.tensor([edge_list1, edge_list2],
                                      dtype=torch.long)
            x = torch.tensor(np.array(index), dtype=torch.float)
            data = Data(x=x, edge_index=edge_index)
            return_dict['pyg_graph'] = data
            return_dict['index_to_entities'] = dict_

        elif format == 'df':
            return_dict['df'] = df

        if split:
            return_dict['split'] = create_fold(df, seed, frac)

        return return_dict
Exemple #15
0
    def sampler_generater(self, batch, le):
        """
        This function passes batch index number to obtained trained object
        """
        deep_pthway = Data()
        newpthway_Namelist = self.data.pthway_NameList.iloc[batch,:].reset_index(drop=True)
        deep_pthway.genome_Namelist = newpthway_Namelist[newpthway_Namelist['GenomeType'] == 'protein']['GenomeName'].values
        activ_id = le.transform(deep_pthway.genome_Namelist)
        deep_pthway.activ_free = self.data.activ_free[activ_id]
        deep_pthway.activ_cancer = self.data.activ_cancer[activ_id]

        deep_pthway.pth_Namelist = newpthway_Namelist
        Edgelist = self.data.Edgelist
        Namelist_l = list(newpthway_Namelist['GenomeName'].values)
        Edgelist_l = list(Edgelist.iloc[:,0].values)
        Edgelist_ll = list(Edgelist.iloc[:,1].values)
        exclude_list = []
        for idx, (elem, elem2) in enumerate(zip(Edgelist_l, Edgelist_ll)):
            if ((elem not in Namelist_l) or (elem2 not in Namelist_l)):
                exclude_list.append(idx)

        newpthway_Edgelist = Edgelist.drop(exclude_list).reset_index(drop=True)
        deep_pthway.Edgelist = newpthway_Edgelist

        le2 = LabelEncoder()
        le2.fit(deep_pthway.pth_Namelist['GenomeName'].values)
        deep_pthway.edge_index = le2.transform(deep_pthway.Edgelist.iloc[:,:2].values.reshape(-1)).reshape(-1,2)
        deep_pthway.all_elem_className = list(le2.classes_)

        # Label edge_class
        le2 = LabelEncoder()
        le2.fit(deep_pthway.Edgelist['edgeType'])
        deep_pthway.edge_class = le2.transform(deep_pthway.Edgelist['edgeType'])
        deep_pthway.edge_className = list(le2.classes_)

        # Label node class
        le2 = LabelEncoder()
        le2.fit(deep_pthway.pth_Namelist['GenomeType'])
        deep_pthway.node_class = le2.transform(deep_pthway.pth_Namelist['GenomeType'])
        deep_pthway.node_className = list(le2.classes_) 
        
        return deep_pthway
Exemple #16
0
def run_model(dataset, conf):
    # ## 1) Build Table graph
    # ### Tables tokenization
    tokenized_tables, vocabulary, cell_dict, reversed_dictionary = corpus_tuple = create_corpus(
        dataset, include_attr=conf["add_attr"])
    if conf["shuffle_vocab"] == True:
        shuffled_vocab = shuffle_vocabulary(vocabulary)
    else:
        shuffled_vocab = None

    nodes = build_node_features(vocabulary)
    row_edges_index, row_edges_weights = build_graph_edges(
        tokenized_tables,
        s_vocab=shuffled_vocab,
        sample_frac=conf["row_edges_sample"],
        columns=False)
    col_edges_index, col_edges_weights = build_graph_edges(
        tokenized_tables,
        s_vocab=shuffled_vocab,
        sample_frac=conf["column_edges_sample"],
        columns=True)

    all_row_edges_index, all_row_edges_weights = build_graph_edges(
        tokenized_tables,
        s_vocab=shuffled_vocab,
        sample_frac=1.0,
        columns=False)
    all_col_edges_index, all_col_edges_weights = build_graph_edges(
        tokenized_tables,
        s_vocab=shuffled_vocab,
        sample_frac=1.0,
        columns=True)
    all_possible_edges = torch.cat((all_row_edges_index, all_col_edges_index),
                                   dim=1)

    edges = torch.cat((row_edges_index, col_edges_index), dim=1)
    weights = torch.cat((row_edges_weights, col_edges_weights), dim=0)
    graph_data = Data(x=nodes, edge_index=edges, edge_attr=weights)

    # ## 2 ) Run Table Auto-Encoder Model:
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    loader = DataLoader(torch.arange(graph_data.num_nodes),
                        batch_size=128,
                        shuffle=True)
    graph_data = graph_data.to(device)

    x, train_pos_edge_index = nodes, edges

    EPS = 1e-15
    MAX_LOGVAR = 10

    class TVGAE(GAE):
        r"""The Variational Graph Auto-Encoder model from the
        `"Variational Graph Auto-Encoders" <https://arxiv.org/abs/1611.07308>`_
        paper.

        Args:
            encoder (Module): The encoder module to compute :math:`\mu` and
                :math:`\log\sigma^2`.
            decoder (Module, optional): The decoder module. If set to :obj:`None`,
                will default to the
                :class:`torch_geometric.nn.models.InnerProductDecoder`.
                (default: :obj:`None`)
        """
        def __init__(self, encoder, decoder=None):
            super(TVGAE, self).__init__(encoder, decoder)

        def reparametrize(self, mu, logvar):
            if self.training:
                return mu + torch.randn_like(logvar) * torch.exp(logvar)
            else:
                return mu

        def encode(self, *args, **kwargs):
            """"""
            self.__rmu__, self.__rlogvar__, self.__cmu__, self.__clogvar__ = self.encoder(
                *args, **kwargs)
            self.__rlogvar__ = self.__rlogvar__.clamp(max=MAX_LOGVAR)
            self.__clogvar__ = self.__clogvar__.clamp(max=MAX_LOGVAR)
            zr = self.reparametrize(self.__rmu__, self.__rlogvar__)
            zc = self.reparametrize(self.__cmu__, self.__clogvar__)
            z = torch.cat((zr, zc), 0)
            return z

        def kl_loss(self):

            rmu = self.__rmu__
            rlogvar = self.__rlogvar__

            cmu = self.__cmu__
            clogvar = self.__clogvar__

            rkl = -0.5 * torch.mean(
                torch.sum(1 + rlogvar - rmu**2 - rlogvar.exp(), dim=1))
            ckl = -0.5 * torch.mean(
                torch.sum(1 + clogvar - rmu**2 - clogvar.exp(), dim=1))
            return (rkl, ckl)

        def recon_loss(self, z, pos_edge_index, all_possible_edges):
            EPS = 1e-15
            MAX_LOGVAR = 10

            pos_loss = -torch.log(
                model.decoder(z, pos_edge_index, sigmoid=True) + EPS).mean()

            # Do not include self-loops in negative samples
            pos_edge_index, _ = remove_self_loops(pos_edge_index)
            pos_edge_index, _ = add_self_loops(pos_edge_index)

            neg_edge_index = negative_sampling(all_possible_edges, z.size(0))
            neg_loss = -torch.log(1 - model.decoder(
                z, neg_edge_index, sigmoid=True) + EPS).mean()

            return pos_loss + neg_loss

    class Encoder(torch.nn.Module):
        def __init__(self, in_channels, out_channels):
            super(Encoder, self).__init__()
            self.conv_rows = GCNConv(in_channels,
                                     2 * out_channels,
                                     cached=True)
            self.conv_cols = GCNConv(in_channels,
                                     2 * out_channels,
                                     cached=True)

            self.conv_rmu = GCNConv(2 * out_channels,
                                    out_channels,
                                    cached=True)
            self.conv_rlogvar = GCNConv(2 * out_channels,
                                        out_channels,
                                        cached=True)

            self.conv_cmu = GCNConv(2 * out_channels,
                                    out_channels,
                                    cached=True)
            self.conv_clogvar = GCNConv(2 * out_channels,
                                        out_channels,
                                        cached=True)

        def forward(self, x, row_edge_index, col_edge_index):
            xr = F.relu(self.conv_rows(x, row_edge_index))
            xc = F.relu(self.conv_cols(x, col_edge_index))
            return self.conv_rmu(xr, row_edge_index),\
                self.conv_rlogvar(xr, row_edge_index),\
                self.conv_cmu(xc, col_edge_index),\
                self.conv_clogvar(xc, col_edge_index)

    channels = conf["vector_size"]

    enc = Encoder(graph_data.num_features, channels)
    model = TVGAE(enc)
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    def train(model, optimizer, x, row_edges, col_edges):
        model.train()
        optimizer.zero_grad()
        z = model.encode(x, row_edges, col_edges)
        mid = int(len(z) / 2)
        zr = z[:mid]
        zc = z[mid:]

        #recon loss:
        rrl = model.recon_loss(zr, row_edges, all_possible_edges)
        crl = model.recon_loss(zc, col_edges, all_possible_edges)
        #loss = rrl+crl

        rkl, ckl = model.kl_loss()
        #loss = rkl+ckl

        loss = rrl + crl + rkl + ckl

        loss.backward()
        optimizer.step()
        #return loss,rrl,crl
        return loss, rrl, crl, rkl, ckl

    def get_cell_vectors(model, x, row_edges_index, col_edges_index):
        model.eval()
        with torch.no_grad():
            z = model.encode(x, row_edges_index, col_edges_index)
            cell_vectors = z.numpy()
        return z, cell_vectors

    losses = []
    results = []
    for epoch in range(conf["epoch_num"]):
        #loss,row_loss,col_loss = train(model,optimizer,x,row_edges_index,col_edges_index)
        loss = train(model, optimizer, x, row_edges_index, col_edges_index)
        losses.append(loss)
        print(epoch, loss)
        z, cell_vectors = get_cell_vectors(model, x, row_edges_index,
                                           col_edges_index)
        vec_list = generate_table_vectors(cell_vectors,
                                          tokenized_tables,
                                          s_vocab=shuffled_vocab)
        result_score = evaluate_model(dataset, vec_list, k=5)
        print(result_score)
        results.append(result_score)

    # ### 3) Extract the latent cell vectors, generate table vectors:

    #z,cell_vectors = get_cell_vectors(model,x,train_pos_edge_index)

    #vec_list=generate_table_vectors(cell_vectors,tokenized_tables,s_vocab=shuffled_vocab)

    # ## 3) Evaluate the model
    #result_score=evaluate_model(dataset,vec_list,k=5)
    return cell_vectors, vec_list, losses, results
Exemple #17
0
 def _generate_data(self):
     data = Data(
         pos=torch.randn((self.num_points, 3)),
         x=torch.randn((self.num_points,
                        self.feature_size)) if self.feature_size else None,
         y=torch.randint(0, 10, (self.num_points, )),
         category=self._category,
     )
     if self.include_box:
         num_boxes = 10
         data.center_label = torch.randn(num_boxes, 3)
         data.heading_class_label = torch.zeros((num_boxes, ))
         data.heading_residual_label = torch.randn((num_boxes, ))
         data.size_class_label = torch.randint(0, len(self.mean_size_arr),
                                               (num_boxes, ))
         data.size_residual_label = torch.randn(num_boxes, 3)
         data.sem_cls_label = torch.randint(0, 10, (num_boxes, ))
         data.box_label_mask = torch.randint(0, 1, (num_boxes, )).bool()
         data.vote_label = torch.randn(self.num_points, 9)
         data.vote_label_mask = torch.randint(0, 1,
                                              (self.num_points, )).bool()
     if self.panoptic:
         data.num_instances = torch.tensor([10])
         data.center_label = torch.randn((self.num_points, 3))
         data.y = torch.randint(0, 10, (self.num_points, ))
         data.instance_labels = torch.randint(0, 20, (self.num_points, ))
         data.instance_mask = torch.rand(self.num_points).bool()
         data.vote_label = torch.randn((self.num_points, 3))
     return data
Exemple #18
0
        # print(img.size())

        fake_RGB_image = fake_RGB_image[:,128:256,128:256,:]
        real_RGB_image = real_RGB_image[:,128:256,128:256,:]
        
        # print(fake_B.size())
        fake_RGB_image = fake_RGB_image.transpose(1,3).transpose(2,3)
        real_RGB_image = real_RGB_image.transpose(1,3).transpose(2,3)
        # print(fake_B.size())


        # image based D
        pred_fake = discriminator(fake_RGB_image, real_A)
        loss_GAN = criterion_GAN(pred_fake, valid) * lambda_IMAGE
        # point based D
        point_cloud_fake = Data(pos=point, x=fake_RGB)
        point_cloud_fake = Batch.from_data_list([point_cloud_fake])
        print(point_cloud_fake)        
        pred_fake_point = pouintD(point_cloud_fake)
        loss_GAN_point = criterion_GAN(pred_fake_point, valid_P) * lambda_POINT
        # Pixel-wise loss
        loss_pixel = criterion_pixelwise(fake_RGB_image, real_RGB_image) * lambda_IMAGE
        # Point-wise loss
        loss_point = criterion_pixelwise(fake_RGB, real_RGB) * lambda_POINT

        # Total loss
        loss_G = lambda_GAN * loss_GAN + loss_GAN_point + lambda_pixel * loss_pixel + lambda_pixel * loss_point

        loss_G.backward()
        if (i+1)%1 == 0:
            optimizer_G.step()
Exemple #19
0
    def __getitem__(self, index):

        return Data(x=torch.FloatTensor(self.x[index]),
                    edge_index=torch.tensor(self.tuopu[index]),
                    user_y=torch.tensor(self.user_y[index]),
                    y=torch.tensor([self.group_y[index]]))
Exemple #20
0
    def __init__(self, root, npoints=20000, transform=None):
        self.npoints = npoints
        self.root = root
        self.pointlist = []
        self.rgblist = []
        self.datalist = []
        
        self.transform = transform
        self.pointpath = root + "/pointcloud_path/"
        print(self.pointpath )
        self.point_list  = glob.glob(self.pointpath + "/*.las")[:]
        print(self.point_list )
        count = 0
        for file in self.point_list:
            print(file)
            # point cloud 取得
            file_h = laspy.file.File(file, mode='r')
            print(file_h.header.min[0])
            print(file_h.header.min[2])
            print(file_h.header.min[1])
            src = np.vstack([file_h.x, file_h.y, file_h.z]).transpose()
            if(len(src)<npoints):continue
            rgb = np.vstack([file_h.red, file_h.green, file_h.blue]).transpose()
            rgb = rgb/255.0
            print(np.amin(rgb, axis=0))
            print(np.amax(rgb, axis=0))
            points = file_h.points['point']
            attr_names = [a for a in points.dtype.names] + ImgtoPointDataset.ATTR_EXTRA_LIST
            features = np.array([getattr(file_h, name) for name in attr_names
                                if name not in ImgtoPointDataset.ATTR_EXLUSION_LIST]).transpose()
            
            print(features[:,1])
            features = features/1.0
            names = [name for name in attr_names if name not in ImgtoPointDataset.ATTR_EXLUSION_LIST]
            print(names)

            file_h.close()
            pcd = o3d.geometry.PointCloud()
            pcd.points = o3d.utility.Vector3dVector(src)
            pcd.colors = o3d.utility.Vector3dVector(rgb)
            # cl, ind = pcd.remove_radius_outlier(nb_points=16, radius=0.05)
            cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20,
                                                        std_ratio=2.0)
            pcd = pcd.select_down_sample(ind)
            src = np.asarray(pcd.points)
            rgb = np.asarray(pcd.colors)
            normlized_xyz = np.zeros((npoints, 3))
            normlized_rgb = np.zeros((npoints, 3))
            normlized_feature = np.zeros((npoints, 3))
            self.coord_min, self.coord_max = np.amin(src, axis=0)[:3], np.amax(src, axis=0)[:3]
            
            if(self.coord_max[0]==0):continue
            if(self.coord_max[1]==0):continue
            if(self.coord_max[2]==0):continue
            print(np.amin(src, axis=0)[:3] )
            print(np.amax(src, axis=0)[:3] )
            src[:, 0] = ((src[:, 0] - self.coord_min[0])/30.0) - 0.5
            src[:, 1] = ((src[:, 1] - self.coord_min[1])/30.0) - 0.5
            src[:, 2] = ((src[:, 2] - self.coord_min[2])/30.0) 
            features[:,0] = features[:,0]/ 4000.0 #'intensity', 'raw_classification', 'num_returns']
            features[:,1] = features[:,1]/ 17.0
            features[:,2] = features[:,2]/ 8.0


            print(np.amin(src, axis=0)[:3] )
            print(np.amax(src, axis=0)[:3] )
            if(len(src) >=npoints):
                normlized_xyz[:,:]=src[:npoints,:]
                normlized_rgb[:,:]=rgb[:npoints,:]
                normlized_feature[:,:] = features[:npoints,:]
            else:
                normlized_xyz[:len(src),:]=src[:,:]

            self.pointlist.append(normlized_xyz)
            self.rgblist.append(normlized_rgb)
            normlized_xyz = torch.from_numpy(normlized_xyz).float()
            random_features = torch.randn(npoints,6)
            random_features[:, :3] = torch.from_numpy(normlized_feature).float()

            self.datalist.append(Data(pos=normlized_xyz[:, :], x=random_features[ :, :3]))

                

        self.data_num = len(self.pointlist)
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[3, 1, 1, 2], [1, 3, 2, 1]], dtype=torch.long)
# 注意x是二维的,不是一维的,每一行代表一个节点的特征向量,此处特征维度为1
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index)
print(data)
'''
# 通过节点对的方式给出
edge_index = torch.tensor([
    [0, 1], [1, 0], [1, 2], [2, 1]
], dtype=torch.long)
data = Data(x=x, edge_index=edge_index.t().contiguous())
print(data)
'''
# 输出data的属性关键字,只有传递参数的才会被输出
print(data.keys)
# ['x', 'edge_index']

# 按照关键字进行输出,注意是字符串
print(data['x'])
# tensor([[-1.],
#         [ 0.],
#         [ 1.]])
print(data['edge_index'])
# tensor([[0, 1, 1, 2],
#         [1, 0, 2, 1]])

print('edge_attr: ', data['edge_attr'])
Exemple #22
0
def dataset_to_graphs(glycan_list,
                      labels,
                      libr=None,
                      label_type=torch.long,
                      separate=False,
                      context=False,
                      error_catch=False,
                      wo_labels=False):
    """wrapper function to convert a whole list of glycans into a graph dataset\n
  | Arguments:
  | :-
  | glycan_list (list): list of IUPAC-condensed glycan sequences as strings
  | labels (list): list of labels
  | label_type (torch object): which tensor type for label, default is torch.long for binary labels, change to torch.float for continuous
  | separate (bool): True returns node list / edge list / label list as separate files; False returns list of data tuples; default is False
  | libr (list): sorted list of unique glycoletters observed in the glycans of our dataset
  | context (bool): legacy-ish; used for generating graph context dataset for pre-training; keep at False
  | error_catch (bool): troubleshooting option, True will print glycans that cannot be converted into graphs; default is False
  | wo_labels (bool): change to True if you do not want to pass and receive labels; default is False\n
  | Returns:
  | :-
  | Returns list of node list / edge list / label list data tuples
  """
    if libr is None:
        libr = lib
    if error_catch:
        glycan_graphs = []
        for k in glycan_list:
            try:
                glycan_graphs.append(glycan_to_graph(k, libr))
            except:
                print(k)
    else:
        glycan_graphs = [glycan_to_graph(k, libr) for k in glycan_list]
    if separate:
        glycan_nodes, glycan_edges = zip(*glycan_graphs)
        return list(glycan_nodes), list(glycan_edges), labels
    else:
        if context:
            contexts = [ggraph_to_context(k, lib=lib) for k in glycan_graphs]
            labels = [k[1] for k in contexts]
            labels = [item for sublist in labels for item in sublist]
            contexts = [k[0] for k in contexts]
            contexts = [item for sublist in contexts for item in sublist]
            data = [
                Data(x=torch.tensor(contexts[k][0], dtype=torch.long),
                     y=torch.tensor(labels[k], dtype=label_type),
                     edge_index=torch.tensor(
                         [contexts[k][1][0], contexts[k][1][1]],
                         dtype=torch.long)) for k in range(len(contexts))
            ]
            return data
        else:
            if wo_labels:
                glycan_nodes, glycan_edges = zip(*glycan_graphs)
                glycan_graphs = list(zip(glycan_nodes, glycan_edges))
                data = [
                    Data(x=torch.tensor(k[0], dtype=torch.long),
                         edge_index=torch.tensor([k[1][0], k[1][1]],
                                                 dtype=torch.long))
                    for k in glycan_graphs
                ]
                return data
            else:
                glycan_nodes, glycan_edges = zip(*glycan_graphs)
                glycan_graphs = list(zip(glycan_nodes, glycan_edges, labels))
                data = [
                    Data(x=torch.tensor(k[0], dtype=torch.long),
                         y=torch.tensor([k[2]], dtype=label_type),
                         edge_index=torch.tensor([k[1][0], k[1][1]],
                                                 dtype=torch.long))
                    for k in glycan_graphs
                ]
                return data
Exemple #23
0
    def read_one_scan(
        scannet_dir,
        scan_name,
        label_map_file,
        donotcare_class_ids,
        max_num_point,
        obj_class_ids,
        use_instance_labels=True,
        use_instance_bboxes=True,
    ):
        mesh_file = osp.join(scannet_dir, scan_name, scan_name + "_vh_clean_2.ply")
        agg_file = osp.join(scannet_dir, scan_name, scan_name + ".aggregation.json")
        seg_file = osp.join(scannet_dir, scan_name, scan_name + "_vh_clean_2.0.010000.segs.json")
        meta_file = osp.join(
            scannet_dir, scan_name, scan_name + ".txt"
        )  # includes axisAlignment info for the train set scans.
        mesh_vertices, semantic_labels, instance_labels, instance_bboxes, instance2semantic = export(
            mesh_file, agg_file, seg_file, meta_file, label_map_file, None
        )

        # Discard unwanted classes
        mask = np.logical_not(np.in1d(semantic_labels, donotcare_class_ids))
        mesh_vertices = mesh_vertices[mask, :]
        semantic_labels = semantic_labels[mask]
        instance_labels = instance_labels[mask]

        bbox_mask = np.in1d(instance_bboxes[:, -1], obj_class_ids)
        instance_bboxes = instance_bboxes[bbox_mask, :]

        # Subsample
        N = mesh_vertices.shape[0]
        if max_num_point:
            if N > max_num_point:
                choices = np.random.choice(N, max_num_point, replace=False)
                mesh_vertices = mesh_vertices[choices, :]
                semantic_labels = semantic_labels[choices]
                instance_labels = instance_labels[choices]

        # Remap labels to [0-(len(valid_labels))]
        count = 0
        for i in range(max(Scannet.SCANNET_COLOR_MAP.keys()) + 1):
            if i in Scannet.VALID_CLASS_IDS:
                label = count
                count += 1
            else:
                label = Scannet.IGNORE_LABEL
            mask = semantic_labels == i
            semantic_labels[mask] = label

        # Build data container
        data = {}
        data["pos"] = torch.from_numpy(mesh_vertices[:, :3])
        data["rgb"] = torch.from_numpy(mesh_vertices[:, 3:]) / 255.0
        data["y"] = torch.from_numpy(semantic_labels)
        data["x"] = None

        if use_instance_labels:
            data["iy"] = torch.from_numpy(instance_labels)

        if use_instance_bboxes:
            data["bbox"] = torch.from_numpy(instance_bboxes)

        return Data(**data)
Exemple #24
0
    def __getitem__(self, item):

        if self.cache_data:

            if item in self.data_dict.keys():
                return self.data_dict[item]

            else:
                pass       
 
        
        pdbid, pose, affinity = self.data_list[item]

        node_feats, coords = None, None
        with h5py.File(self.data_file, "r") as f:

            if (
                not self.dataset_name
                in f[
                    "{}/{}/{}".format(
                        pdbid, self.feature_type, self.preprocessing_type
                    )
                ].keys()
            ):
                print(pdbid)
                return None

            if self.use_docking:
                # TODO: the next line will cuase runtime error because not selelcting poses
                data = f[
                    "{}/{}/{}/{}".format(
                        pdbid,
                        self.feature_type,
                        self.preprocessing_type,
                        self.dataset_name,
                    )
                ][pose]["data"]
                vdw_radii = (
                    f[

                        "{}/{}/{}/{}".format(
                            pdbid,
                            self.feature_type,
                            self.preprocessing_type,
                            self.dataset_name,
                        )
                    ][pose]
                    .attrs["van_der_waals"]
                    .reshape(-1, 1)
                )

            else:
                data = f[
                    "{}/{}/{}/{}".format(
                        pdbid,
                        self.feature_type,
                        self.preprocessing_type,
                        self.dataset_name,
                    )
                ]["data"]
                vdw_radii = (
                    f[

                        "{}/{}/{}/{}".format(
                            pdbid,
                            self.feature_type,
                            self.preprocessing_type,
                            self.dataset_name,
                        )

                    ]
                    .attrs["van_der_waals"]
                    .reshape(-1, 1)
                )

            if self.feature_type == "pybel":
                coords = data[:, 0:3]
                node_feats = np.concatenate([vdw_radii, data[:, 3:22]], axis=1)

            else:
                raise NotImplementedError

        # account for the vdw radii in distance cacluations (consider each atom as a sphere, distance between spheres)

        dists = pairwise_distances(coords, metric="euclidean")

        edge_index, edge_attr = dense_to_sparse(torch.from_numpy(dists).float())

        x = torch.from_numpy(node_feats).float()

        y = torch.FloatTensor(affinity).view(-1, 1)
        data = Data(
            x=x, edge_index=edge_index, edge_attr=edge_attr.view(-1, 1), y=y
        )


        if self.cache_data:

            if self.output_info:
                self.data_dict[item] = (pdbid, pose, data)

            else:
                self.data_dict[item] = data

            return self.data_dict[item]

        else:
            if self.output_info:
                return (pdbid, pose, data)
            else:
                return data
Exemple #25
0
from torch_geometric.utils import k_hop_subgraph, from_networkx
import pickle
import networkx as nx
from math import floor

prefix = '/gpfs_home/spate116/singhlab/GCN_Integration/scripts/BI/examples/syn/'
G = nx.read_gpickle( prefix + 'data/syn4_G.pickle')
with open(prefix + 'data/syn4_lab.pickle', 'rb') as f:
    labels = pickle.load(f)

x = torch.tensor([x[1]['feat'] for x in G.nodes(data=True)])
edge_index = torch.tensor([x for x in G.edges])
edge_index_flipped = edge_index[:, [1, 0]]
edge_index = torch.cat((edge_index, edge_index_flipped))
y = torch.tensor(labels, dtype=torch.long)
data = Data(x=x, edge_index=edge_index.T, y=y)

class Net(torch.nn.Module):
    def __init__(self, k=1, x=64):
        super(Net, self).__init__()
        self.conv1 = GCNConv(10, x)
        self.conv2 = GCNConv(x, x)
        self.conv3 = GCNConv(x, max(y).tolist()+1)

    def forward(self, x, edge_index):
        x = F.leaky_relu(self.conv1(x, edge_index))
        x = F.leaky_relu(self.conv2(x, edge_index))
        x = self.conv3(x, edge_index)
        return x
    
# Load everything onto the gpu if available
    def process_set(self, dataset):
        if self.dataset == 'ins_seg_h5':
            raw_path = osp.join(self.raw_dir, 'ins_seg_h5_for_sgpn',
                                self.dataset)
            categories = glob(osp.join(raw_path, '*'))
            categories = sorted([x.split(os.sep)[-1] for x in categories])

            data_list = []
            for target, category in enumerate(tqdm(categories)):
                folder = osp.join(raw_path, category)
                paths = glob('{}/{}-*.h5'.format(folder, dataset))
                labels, nors, opacitys, pts, rgbs = [], [], [], [], []
                for path in paths:
                    f = h5py.File(path)
                    pts += torch.from_numpy(f['pts'][:]).unbind(0)
                    labels += torch.from_numpy(f['label'][:]).to(
                        torch.long).unbind(0)
                    nors += torch.from_numpy(f['nor'][:]).unbind(0)
                    opacitys += torch.from_numpy(f['opacity'][:]).unbind(0)
                    rgbs += torch.from_numpy(f['rgb'][:]).to(
                        torch.float32).unbind(0)

                for i, (pt, label, nor, opacity, rgb) in enumerate(
                        zip(pts, labels, nors, opacitys, rgbs)):
                    data = Data(pos=pt[:, :3],
                                y=label,
                                norm=nor[:, :3],
                                x=torch.cat(
                                    (opacity.unsqueeze(-1), rgb / 255.), 1))

                    if self.pre_filter is not None and not self.pre_filter(
                            data):
                        continue
                    if self.pre_transform is not None:
                        data = self.pre_transform(data)
                    data_list.append(data)
        else:
            raw_path = osp.join(self.raw_dir, self.dataset)
            categories = glob(osp.join(raw_path, self.object))
            categories = sorted([x.split(os.sep)[-1] for x in categories])
            data_list = []
            # class_name = []
            for target, category in enumerate(tqdm(categories)):
                folder = osp.join(raw_path, category)
                paths = glob('{}/{}-*.h5'.format(folder, dataset))
                labels, pts = [], []
                # clss = category.split('-')[0]

                for path in paths:
                    f = h5py.File(path)
                    pts += torch.from_numpy(f['data'][:].astype(
                        np.float32)).unbind(0)
                    labels += torch.from_numpy(f['label_seg'][:].astype(
                        np.float32)).to(torch.long).unbind(0)
                for i, (pt, label) in enumerate(zip(pts, labels)):
                    data = Data(pos=pt[:, :3], y=label)
                    # data = PartData(pos=pt[:, :3], y=label, clss=clss)
                    if self.pre_filter is not None and not self.pre_filter(
                            data):
                        continue
                    if self.pre_transform is not None:
                        data = self.pre_transform(data)
                    data_list.append(data)
        return self.collate(data_list)
Exemple #27
0
    def process(self):
        if models is None or T is None or Image is None:
            raise ImportError('Package `torchvision` could not be found.')

        splits = np.load(osp.join(self.raw_dir, 'splits.npz'),
                         allow_pickle=True)
        category_idx = self.categories.index(self.category)
        train_split = list(splits['train'])[category_idx]
        test_split = list(splits['test'])[category_idx]

        image_path = osp.join(self.raw_dir, 'images', 'JPEGImages')
        info_path = osp.join(self.raw_dir, 'images', 'Annotations')
        annotation_path = osp.join(self.raw_dir, 'annotations')

        labels = {}

        vgg16_outputs = []

        def hook(module, x, y):
            vgg16_outputs.append(y.to('cpu'))

        vgg16 = models.vgg16(pretrained=True).to(self.device)
        vgg16.eval()
        vgg16.features[20].register_forward_hook(hook)  # relu4_2
        vgg16.features[25].register_forward_hook(hook)  # relu5_1

        transform = T.Compose([
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        train_data_list, test_data_list = [], []
        for i, name in enumerate(chain(train_split, test_split)):
            filename = '_'.join(name.split('/')[1].split('_')[:-1])
            idx = int(name.split('_')[-1].split('.')[0]) - 1

            path = osp.join(info_path, '{}.xml'.format(filename))
            obj = minidom.parse(path).getElementsByTagName('object')[idx]

            trunc = obj.getElementsByTagName('truncated')[0].firstChild.data
            occ = obj.getElementsByTagName('occluded')
            occ = '0' if len(occ) == 0 else occ[0].firstChild.data
            diff = obj.getElementsByTagName('difficult')[0].firstChild.data

            if bool(int(trunc)) or bool(int(occ)) or bool(int(diff)):
                continue

            if self.category == 'person' and int(filename[:4]) > 2008:
                continue

            xmin = float(obj.getElementsByTagName('xmin')[0].firstChild.data)
            xmax = float(obj.getElementsByTagName('xmax')[0].firstChild.data)
            ymin = float(obj.getElementsByTagName('ymin')[0].firstChild.data)
            ymax = float(obj.getElementsByTagName('ymax')[0].firstChild.data)
            box = (xmin, ymin, xmax, ymax)

            dom = minidom.parse(osp.join(annotation_path, name))
            keypoints = dom.getElementsByTagName('keypoint')
            poss, ys = [], []
            for keypoint in keypoints:
                label = keypoint.attributes['name'].value
                if label not in labels:
                    labels[label] = len(labels)
                ys.append(labels[label])
                x = float(keypoint.attributes['x'].value)
                y = float(keypoint.attributes['y'].value)
                poss += [x, y]
            y = torch.tensor(ys, dtype=torch.long)
            pos = torch.tensor(poss, dtype=torch.float).view(-1, 2)

            if pos.numel() > 0:
                # Add a small offset to the bounding because some keypoints lay
                # outside the bounding box intervals.
                box = (min(pos[:, 0].min().floor().item(), box[0]) - 16,
                       min(pos[:, 1].min().floor().item(), box[1]) - 16,
                       max(pos[:, 0].max().ceil().item(), box[2]) + 16,
                       max(pos[:, 1].max().ceil().item(), box[3]) + 16)

                # Rescale keypoints.
                pos[:, 0] = (pos[:, 0] - box[0]) * 256.0 / (box[2] - box[0])
                pos[:, 1] = (pos[:, 1] - box[1]) * 256.0 / (box[3] - box[1])

                path = osp.join(image_path, '{}.jpg'.format(filename))
                with open(path, 'rb') as f:
                    img = Image.open(f).convert('RGB').crop(box)
                    img = img.resize((256, 256), resample=Image.BICUBIC)

                img = transform(img)
                vgg16_outputs.clear()
                with torch.no_grad():
                    vgg16(img.unsqueeze(0).to(self.device))

                xs = []
                for out in vgg16_outputs:
                    out = F.interpolate(out, (256, 256),
                                        mode='bilinear',
                                        align_corners=False)
                    out = out.squeeze(0).permute(1, 2, 0)  # [H, W, C]
                    pos_index = pos.round().long().clamp(0, 255)
                    out = out[pos_index[:, 1], pos_index[:, 0]]
                    xs.append(out)

                x = torch.cat(xs, dim=-1)
            else:
                x = torch.tensor([], dtype=torch.float).view(0, 1024)

            data = Data(x=x, pos=pos, y=y, name=filename)

            if self.pre_filter is not None and not self.pre_filter(data):
                continue
            if self.pre_transform is not None:
                data = self.pre_transform(data)

            if i < len(train_split):
                train_data_list.append(data)
            else:
                test_data_list.append(data)

        torch.save(self.collate(train_data_list), self.processed_paths[0])
        torch.save(self.collate(test_data_list), self.processed_paths[1])
    def process(self):
        if models is None or T is None or Image is None:
            raise ImportError('Package `torchvision` could not be found.')

        category = self.category.capitalize()
        names = glob.glob(osp.join(self.raw_dir, category, '*.png'))
        names = sorted([name[:-4] for name in names])

        vgg16_outputs = []

        def hook(module, x, y):
            vgg16_outputs.append(y.to('cpu'))

        vgg16 = models.vgg16(pretrained=True).to(self.device)
        vgg16.eval()
        vgg16.features[20].register_forward_hook(hook)  # relu4_2
        vgg16.features[25].register_forward_hook(hook)  # relu5_1

        transform = T.Compose([
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        data_list = []
        for name in names:
            pos = loadmat('{}.mat'.format(name))['pts_coord']
            x, y = torch.from_numpy(pos).to(torch.float)
            pos = torch.stack([x, y], dim=1)

            # The "face" category contains a single image with less than 10
            # keypoints, so we need to skip it.
            if pos.size(0) != 10:
                continue

            with open('{}.png'.format(name), 'rb') as f:
                img = Image.open(f).convert('RGB')

            # Rescale keypoints.
            pos[:, 0] = pos[:, 0] * 256.0 / (img.size[0])
            pos[:, 1] = pos[:, 1] * 256.0 / (img.size[1])

            img = img.resize((256, 256), resample=Image.BICUBIC)

            img = transform(img)
            size = img.size()[-2:]
            vgg16_outputs.clear()
            with torch.no_grad():
                vgg16(img.unsqueeze(0).to(self.device))

            xs = []
            for out in vgg16_outputs:
                out = F.interpolate(out, size, mode='bilinear',
                                    align_corners=False)
                out = out.squeeze(0).permute(1, 2, 0)
                pos_index = pos.round().long().clamp(0, 255)
                out = out[pos_index[:, 1], pos_index[:, 0]]
                xs.append(out)
            x = torch.cat(xs, dim=-1)

            data = Data(x=x, pos=pos)

            if self.pre_filter is not None and not self.pre_filter(data):
                continue
            if self.pre_transform is not None:
                data = self.pre_transform(data)
            data_list.append(data)

        torch.save(self.collate(data_list), self.processed_paths[0])
Exemple #29
0
    def fit(
        self,
        features,
        adj,
        labels,
        idx_train,
        idx_val=None,
        idx_test=None,
        train_iters=81,
        att_0=None,
        attention=False,
        model_name=None,
        initialize=True,
        verbose=False,
        normalize=False,
        patience=510,
    ):
        '''
            train the gcn model, when idx_val is not None, pick the best model
            according to the validation loss
        '''
        """SAINT Sampler"""
        """form data"""

        data = Data(adj=adj,
                    features=features.to_dense(),
                    labels=labels,
                    idx_train=idx_train,
                    idx_val=idx_val,
                    idx_test=idx_test,
                    num_node_features=int(features.shape[-1]),
                    num_classes=int(labels.max() + 1))
        data.num_nodes = 2110
        data.num_classes = int(labels.max() + 1)
        data.num_node_features = int(features.shape[-1])

        # loader = GraphSAINTRandomWalkSampler(data, batch_size=6000, walk_length=2,
        #                                      num_steps=5, sample_coverage=1000,
        #                                      save_dir='saint_data/',
        #                                      num_workers=1)

        self.sim = None
        self.idx_test = idx_test
        self.attention = attention

        if initialize:
            self.initialize()

        if type(adj) is not torch.Tensor:
            features, adj, labels = utils.to_tensor(features,
                                                    adj,
                                                    labels,
                                                    device=self.device)
        else:
            features = features.to(self.device)
            adj = adj.to(self.device)
            labels = labels.to(self.device)

        # normalize = False # we don't need normalize here, the norm is conducted in the GCN (self.gcn1) model
        # if normalize:
        #     if utils.is_sparse_tensor(adj):
        #         adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
        #     else:
        #         adj_norm = utils.normalize_adj_tensor(adj)
        # else:
        #     adj_norm = adj
        # add self loop
        adj = self.add_loop_sparse(adj)
        """The normalization gonna be done in the GCNConv"""
        self.adj_norm = adj
        self.features = features
        self.labels = labels

        # if idx_val is None:
        #     self._train_without_val(labels, idx_train, train_iters, verbose)
        # else:
        #     if patience < train_iters:
        #         self._train_with_early_stopping(labels, idx_train, idx_val, train_iters, patience, verbose)
        #     else:
        self._train_with_val(labels, idx_train, idx_val, train_iters, verbose)
Exemple #30
0
def build_graph(img,
                tracks,
                current_detections,
                distance_limit,
                test=True,
                mean_prediction=False):

    if len(tracks) and len(current_detections):
        node_attr = []
        edge_attr = []
        coords_original = []
        coords_normalized = []
        edges_first_row = []
        edges_second_row = []
        edges_complete_first_row = []
        edges_complete_second_row = []
        ground_truth = []

        for track in tracks:
            if mean_prediction == True:
                # Przewidywanie położenia
                temp = Detection(
                    track.mean[:4], format='xyah'
                )  # pobieramy przewidzine położenie z Kalmana w formacie xyah
                bbox = temp.to_tlbr()
                bbox_norm = bbox_normalization(img, temp.to_xywh())
            else:
                # Bez przewidywania położenia
                bbox = track.bbox
                bbox_norm = track.bbox_normalized

            coords_original.append(
                bbox
            )  # oryginalne koordynaty w formacie tlbr do obliczania IoU
            coords_normalized.append(
                bbox_norm
            )  # znormalizowane koordynaty w formacie xywh do porównywania różnicy w położeniu
            node_attr.append(track.crop)

        for detection in current_detections:
            coords_original.append(detection.bbox)
            coords_normalized.append(detection.bbox_normalized)
            node_attr.append(detection.crop)

        for i in range(len(tracks) + len(current_detections)):
            for j in range(len(tracks) + len(current_detections)):
                distance = (
                    (coords_original[i][0] - coords_original[j][0])**2 +
                    (coords_original[i][1] - coords_original[j][1])**2)**0.5
                if i < len(tracks) and j >= len(tracks):
                    if distance < distance_limit:
                        edges_first_row.append(i)
                        edges_second_row.append(j)
                        edge_attr.append([0.0])
                    # tworzenie macierzy A
                    if test == True:
                        edges_complete_first_row.append(i)
                        edges_complete_second_row.append(j)
                    # tworzenie macierzy X_ref
                    if int(tracks[i].track_id) == int(
                            current_detections[j - len(tracks)].track_id):
                        ground_truth.append(1.0)
                    else:
                        ground_truth.append(0.0)
                # połączenia nieskierowane
                elif i >= len(tracks) and j < len(tracks):
                    if distance < distance_limit:
                        edges_first_row.append(i)
                        edges_second_row.append(j)
                        edge_attr.append([0.0])

        frame_node_attr = torch.stack(node_attr)
        frame_edge_attr = torch.tensor(edge_attr, dtype=torch.float)
        frame_edges_index = torch.tensor([edges_first_row, edges_second_row],
                                         dtype=torch.long)
        frame_coords_normalized = torch.tensor(coords_normalized,
                                               dtype=torch.float)
        frame_ground_truth = torch.tensor(ground_truth, dtype=torch.float)
        tracklets_frame = torch.tensor(len(tracks),
                                       dtype=torch.float).reshape(1)
        detections_frame = torch.tensor(len(current_detections),
                                        dtype=torch.float).reshape(1)
        coords_original = torch.tensor(coords_original, dtype=torch.float)
        edges_complete = torch.tensor(
            [edges_complete_first_row, edges_complete_second_row],
            dtype=torch.long)

        data = Data(x=frame_node_attr,
                    edge_index=frame_edges_index,
                    edge_attr=frame_edge_attr,
                    coords_normalized=frame_coords_normalized,
                    coords_original=coords_original,
                    ground_truth=frame_ground_truth,
                    det_num=detections_frame,
                    track_num=tracklets_frame,
                    edges_complete=edges_complete)
        return data