def process(self): import gzip import pandas as pd import rdflib as rdf graph_file, task_file, train_file, test_file = self.raw_paths g = rdf.Graph() with gzip.open(graph_file, 'rb') as f: g.parse(file=f, format='nt') freq_ = Counter(g.predicates()) def freq(rel): return freq_[rel] if rel in freq_ else 0 relations = sorted(set(g.predicates()), key=lambda rel: -freq(rel)) subjects = set(g.subjects()) objects = set(g.objects()) nodes = list(subjects.union(objects)) relations_dict = {rel: i for i, rel in enumerate(list(relations))} nodes_dict = {node: i for i, node in enumerate(nodes)} edge_list = [] for s, p, o in g.triples((None, None, None)): src, dst, rel = nodes_dict[s], nodes_dict[o], relations_dict[p] edge_list.append([src, dst, 2 * rel]) edge_list.append([dst, src, 2 * rel + 1]) edge_list = sorted(edge_list, key=lambda x: (x[0], x[1], x[2])) edge = torch.tensor(edge_list, dtype=torch.long).t().contiguous() edge_index, edge_type = edge[:2], edge[2] if self.name == 'am': label_header = 'label_cateogory' nodes_header = 'proxy' elif self.name == 'aifb': label_header = 'label_affiliation' nodes_header = 'person' elif self.name == 'mutag': label_header = 'label_mutagenic' nodes_header = 'bond' elif self.name == 'bgs': label_header = 'label_lithogenesis' nodes_header = 'rock' labels_df = pd.read_csv(task_file, sep='\t') labels_set = set(labels_df[label_header].values.tolist()) labels_dict = {lab: i for i, lab in enumerate(list(labels_set))} nodes_dict = {np.unicode(key): val for key, val in nodes_dict.items()} train_labels_df = pd.read_csv(train_file, sep='\t') train_indices, train_labels = [], [] for nod, lab in zip(train_labels_df[nodes_header].values, train_labels_df[label_header].values): train_indices.append(nodes_dict[nod]) train_labels.append(labels_dict[lab]) train_idx = torch.tensor(train_indices, dtype=torch.long) train_y = torch.tensor(train_labels, dtype=torch.long) test_labels_df = pd.read_csv(test_file, sep='\t') test_indices, test_labels = [], [] for nod, lab in zip(test_labels_df[nodes_header].values, test_labels_df[label_header].values): test_indices.append(nodes_dict[nod]) test_labels.append(labels_dict[lab]) test_idx = torch.tensor(test_indices, dtype=torch.long) test_y = torch.tensor(test_labels, dtype=torch.long) data = Data(edge_index=edge_index) data.edge_type = edge_type data.train_idx = train_idx data.train_y = train_y data.test_idx = test_idx data.test_y = test_y data.num_nodes = edge_index.max().item() + 1 data, slices = self.collate([data]) torch.save((data, slices), self.processed_paths[0])
def visualize_subgraph(self, node_idx, edge_index, edge_mask, y=None, threshold=None, edge_y=None, node_alpha=None, seed=10, **kwargs): r"""Visualizes the subgraph given an edge mask :attr:`edge_mask`. Args: node_idx (int): The node id to explain. Set to :obj:`-1` to explain graph. edge_index (LongTensor): The edge indices. edge_mask (Tensor): The edge mask. y (Tensor, optional): The ground-truth node-prediction labels used as node colorings. All nodes will have the same color if :attr:`node_idx` is :obj:`-1`.(default: :obj:`None`). threshold (float, optional): Sets a threshold for visualizing important edges. If set to :obj:`None`, will visualize all edges with transparancy indicating the importance of edges. (default: :obj:`None`) edge_y (Tensor, optional): The edge labels used as edge colorings. node_alpha (Tensor, optional): Tensor of floats (0 - 1) indicating transparency of each node. seed (int, optional): Random seed of the :obj:`networkx` node placement algorithm. (default: :obj:`10`) **kwargs (optional): Additional arguments passed to :func:`nx.draw`. :rtype: :class:`matplotlib.axes.Axes`, :class:`networkx.DiGraph` """ import matplotlib.pyplot as plt import networkx as nx assert edge_mask.size(0) == edge_index.size(1) if node_idx == -1: hard_edge_mask = torch.BoolTensor([True] * edge_index.size(1), device=edge_mask.device) subset = torch.arange(edge_index.max().item() + 1, device=edge_index.device) y = None else: # Only operate on a k-hop subgraph around `node_idx`. subset, edge_index, _, hard_edge_mask = k_hop_subgraph( node_idx, self.num_hops, edge_index, relabel_nodes=True, num_nodes=None, flow=self.__flow__()) edge_mask = edge_mask[hard_edge_mask] if threshold is not None: edge_mask = (edge_mask >= threshold).to(torch.float) if y is None: y = torch.zeros(edge_index.max().item() + 1, device=edge_index.device) else: y = y[subset].to(torch.float) / y.max().item() if edge_y is None: edge_color = ['black'] * edge_index.size(1) else: colors = list(plt.rcParams['axes.prop_cycle']) edge_color = [ colors[i % len(colors)]['color'] for i in edge_y[hard_edge_mask] ] data = Data(edge_index=edge_index, att=edge_mask, edge_color=edge_color, y=y, num_nodes=y.size(0)).to('cpu') G = to_networkx(data, node_attrs=['y'], edge_attrs=['att', 'edge_color']) mapping = {k: i for k, i in enumerate(subset.tolist())} G = nx.relabel_nodes(G, mapping) node_args = set(signature(nx.draw_networkx_nodes).parameters.keys()) node_kwargs = {k: v for k, v in kwargs.items() if k in node_args} node_kwargs['node_size'] = kwargs.get('node_size') or 800 node_kwargs['cmap'] = kwargs.get('cmap') or 'cool' label_args = set(signature(nx.draw_networkx_labels).parameters.keys()) label_kwargs = {k: v for k, v in kwargs.items() if k in label_args} label_kwargs['font_size'] = kwargs.get('font_size') or 10 pos = nx.spring_layout(G, seed=seed) ax = plt.gca() for source, target, data in G.edges(data=True): ax.annotate('', xy=pos[target], xycoords='data', xytext=pos[source], textcoords='data', arrowprops=dict( arrowstyle="->", alpha=max(data['att'], 0.1), color=data['edge_color'], shrinkA=sqrt(node_kwargs['node_size']) / 2.0, shrinkB=sqrt(node_kwargs['node_size']) / 2.0, connectionstyle="arc3,rad=0.1", )) if node_alpha is None: nx.draw_networkx_nodes(G, pos, node_color=y.tolist(), **node_kwargs) else: node_alpha_subset = node_alpha[subset] assert ((node_alpha_subset >= 0) & (node_alpha_subset <= 1)).all() nx.draw_networkx_nodes(G, pos, alpha=node_alpha_subset.tolist(), node_color=y.tolist(), **node_kwargs) nx.draw_networkx_labels(G, pos, **label_kwargs) return ax, G
def _process(self, data_list): if len(data_list) == 0: return Data() data = Batch.from_data_list(data_list) delattr(data, "batch") return data
def visualize_subgraph(self, node_idx, edge_index, edge_mask, y=None, k=2, threshold=None, **kwargs): r"""Visualizes the subgraph around :attr:`node_idx` given an edge mask :attr:`edge_mask`. Args: node_idx (int): The node id to explain. edge_index (LongTensor): The edge indices. edge_mask (Tensor): The edge mask. y (Tensor, optional): The ground-truth node-prediction labels used as node colorings. (default: :obj:`None`) threshold (float, optional): Sets a threshold for visualizing important edges. If set to :obj:`None`, will visualize all edges with transparancy indicating the importance of edges. (default: :obj:`None`) **kwargs (optional): Additional arguments passed to :func:`nx.draw`. :rtype: :class:`matplotlib.axes.Axes`, :class:`networkx.DiGraph` """ # Only operate on a k-hop subgraph around `node_idx`. subset, edge_index, _, _ = k_hop_subgraph(node_idx, k, edge_index, relabel_nodes=True) if threshold is not None: edge_mask = (edge_mask >= threshold).to(torch.float) if y is None: y = torch.zeros(edge_index.max().item() + 1, device=edge_index.device) else: y = y[subset].to(torch.float) / y.max().item() data = Data(edge_index=edge_index, att=edge_mask, y=y, num_nodes=y.size(0)).to('cpu') G = to_networkx(data, node_attrs=['y'], edge_attrs=['att']) mapping = {k: i for k, i in enumerate(subset.tolist())} G = nx.relabel_nodes(G, mapping) node_kwargs = copy(kwargs) node_kwargs['node_size'] = kwargs.get('node_size') or 800 node_kwargs['cmap'] = kwargs.get('cmap') or 'Accent' label_kwargs = copy(kwargs) label_kwargs['font_size'] = kwargs.get('font_size') or 10 pos = nx.spring_layout(G) ax = plt.gca() ax.axis('off') for source, target, data in G.edges(data=True): ax.annotate('', xy=pos[target], xycoords='data', xytext=pos[source], textcoords='data', arrowprops=dict( arrowstyle="->", alpha=max(data['att'], 0.05), shrinkA=sqrt(node_kwargs['node_size']) / 2.0, shrinkB=sqrt(node_kwargs['node_size']) / 2.0, connectionstyle="arc3,rad=0.1", )) nx.draw_networkx_nodes(G, pos, node_color=y.tolist(), **node_kwargs) nx.draw_networkx_labels(G, pos, **label_kwargs) return ax, G
node2id[node1] = id1 try: id2 = node2id[node2] except: id2 = len(node2id) node2id[node2] = id2 edge_list.add((id1, id2)) # edge_list.add((id2, id1)) except: pass edge_index = torch.tensor(np.array(edge_list).T, dtype=torch.long) data = Data(edge_index=edge_index) model = Node2Vec(data.edge_index, embedding_dim=128, walk_length=4, context_size=2, walks_per_node=2, sparse=True).to(device) loader = model.loader(batch_size=2000, shuffle=True, num_workers=12) optimizer = torch.optim.SparseAdam(model.parameters(), lr=0.01) for epoch in range(EPOCHS): model.train() # total_loss = 0 for pos_rw, neg_rw in loader: optimizer.zero_grad() loss = model.loss(pos_rw.to(device), neg_rw.to(device)) loss.backward()
def perturb_edges(data, name, remove_pct, add_pct, hidden_channels=16, epochs=400): if remove_pct == 0 and add_pct == 0: return try: cached = pickle.load( open(f'{ROOT}/cache/edge/{name}_{remove_pct}_{add_pct}.pt', 'rb')) print(f'Use cached edge augmentation for dataset {name}') if data.setting == 'inductive': data.train_edge_index = cached else: data.edge_index = cached return except FileNotFoundError: try: A_pred, adj_orig = pickle.load( open(f'{ROOT}/cache/edge/{name}.pt', 'rb')) A = sample_graph_det(adj_orig, A_pred, remove_pct, add_pct) data.edge_index, _ = from_scipy_sparse_matrix(A) pickle.dump( data.edge_index, open(f'{ROOT}/cache/edge/{name}_{remove_pct}_{add_pct}.pt', 'wb')) return except FileNotFoundError: print( f'cache/edge/{name}_{remove_pct}_{add_pct}.pt not found! Regenerating it now' ) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if data.setting == 'inductive': train_data = Data(x=data.train_x, ori_x=data.ori_x, edge_index=data.train_edge_index, y=data.train_y) else: train_data = deepcopy(data) edge_index = deepcopy(train_data.edge_index) train_data = train_test_split_edges(train_data, val_ratio=0.1, test_ratio=0) num_features = train_data.ori_x.shape[1] model = GAE(GCNEncoder(num_features, hidden_channels)) model = model.to(device) x = train_data.ori_x.to(device) train_pos_edge_index = train_data.train_pos_edge_index.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.01) best_val_auc = 0 best_z = None for epoch in range(1, epochs + 1): model.train() optimizer.zero_grad() z = model.encode(x, train_pos_edge_index) loss = model.recon_loss(z, train_pos_edge_index) loss.backward() optimizer.step() model.eval() with torch.no_grad(): z = model.encode(x, train_pos_edge_index) auc, ap = model.test(z, train_data.val_pos_edge_index, train_data.val_neg_edge_index) print('Val | Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format( epoch, auc, ap)) if auc > best_val_auc: best_val_auc = auc best_z = deepcopy(z) A_pred = torch.sigmoid(torch.mm(z, z.T)).cpu().numpy() adj_orig = to_scipy_sparse_matrix(edge_index).asformat('csr') adj_pred = sample_graph_det(adj_orig, A_pred, remove_pct, add_pct) if data.setting == 'inductive': data.train_edge_index, _ = from_scipy_sparse_matrix(adj_pred) else: data.edge_index, _ = from_scipy_sparse_matrix(adj_pred) pickle.dump((A_pred, adj_orig), open(f'{ROOT}/cache/edge/{name}.pt', 'wb')) if data.setting == 'inductive': pickle.dump( data.train_edge_index, open(f'{ROOT}/cache/edge/{name}_{remove_pct}_{add_pct}.pt', 'wb')) else: pickle.dump( data.edge_index, open(f'{ROOT}/cache/edge/{name}_{remove_pct}_{add_pct}.pt', 'wb'))
def start_view(args): outdir = args.outdir event = master.Event(utils_dir.inputdir) event.read(args.evtid) # randomly select N particles with each having at least 6 hits pids = event.particles[(event.particles.nhits) > 5] np.random.seed(args.seed) rnd = np.random.randint(0, pids.shape[0], args.npids) sel_pids = pids.particle_id.values[rnd] event._hits = event.hits[event.hits.particle_id.isin(sel_pids)] hits = event.cluster_info(utils_dir.detector_path) # track labeling -- determine true edges... hits = hits.assign(R=np.sqrt((hits.x - hits.vx)**2 + (hits.y - hits.vy)**2 + (hits.z - hits.vz)**2)) hits = hits.sort_values('R').reset_index(drop=True).reset_index(drop=False) hit_list = hits.groupby( ['particle_id', 'layer'], sort=False)['index'].agg(lambda x: list(x)).groupby( level=0).agg(lambda x: list(x)) e = [] for row in hit_list.values: for i, j in zip(row[0:-1], row[1:]): e.extend(list(itertools.product(i, j))) layerless_true_edges = np.array(e).T # input data for embedding data = Data(x=torch.from_numpy(hits[['r', 'phi', 'z']].to_numpy()/np.array([1000, np.pi, 1000])).float(),\ pid=torch.from_numpy(hits.particle_id.to_numpy()), layers=torch.from_numpy(hits.layer.to_numpy()), hid=torch.from_numpy(hits.hit_id.to_numpy())) cell_features = [ 'cell_count', 'cell_val', 'leta', 'lphi', 'lx', 'ly', 'lz', 'geta', 'gphi' ] data.layerless_true_edges = torch.from_numpy(layerless_true_edges) data.cell_data = torch.from_numpy(hits[cell_features].values).float() action = 'embedding' config_file = pkg_resources.resource_filename( "exatrkx", os.path.join('configs', config_dict[action])) with open(config_file) as f: e_config = yaml.load(f, Loader=yaml.FullLoader) e_config['train_split'] = [1, 0, 0] e_config['r_val'] = 2.0 e_model = LayerlessEmbedding(e_config) e_model = e_model.load_from_checkpoint(args.embed_ckpt_dir, hparams=e_config) e_model.eval() spatial = e_model(torch.cat([data.cell_data, data.x], axis=-1)) spatial_np = spatial.detach().numpy() # plot hits in the embedding space embedding_dims = [(0, 1), (2, 3), (4, 5), (6, 7)] for id1, id2 in embedding_dims: fig = plt.figure(figsize=(6, 6)) for pid in sel_pids: idx = hits.particle_id == pid plt.scatter(spatial_np[idx, id1], spatial_np[idx, id2]) plt.savefig( os.path.join(outdir, "embedding_{}_{}.pdf".format(id1, id2))) del fig # build edges from the embedding space e_spatial = utils_torch.build_edges(spatial, e_model.hparams['r_val'], e_model.hparams['knn_val']) e_spatial_np = e_spatial.detach().numpy() # view hits with or without edge candidates... fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(111, projection='3d') for pid in sel_pids: ax.scatter(hits[hits.particle_id == pid].x.values, hits[hits.particle_id == pid].y.values, hits[hits.particle_id == pid].z.values) # add edges e_spatial_np_t = e_spatial_np.T for iedge in range(e_spatial_np.shape[1]): ax.plot(hits.iloc[e_spatial_np_t[iedge]].x.values, hits.iloc[e_spatial_np_t[iedge]].y.values, hits.iloc[e_spatial_np_t[iedge]].z.values, color='k', alpha=0.3, lw=1.) ax.set_xlabel('X Label') ax.set_ylabel('Y Label') ax.set_zlabel('Z Label') plt.savefig(os.path.join(outdir, "emedding_edges_3d.pdf")) del fig del ax e_spatial_np_t = e_spatial_np.T layerless_true_edges_t = layerless_true_edges.T # same as e def plot_edges(xname, yname, xlabel, ylabel, outname, with_edges=True, no_axis=False, edges=e_spatial_np_t): fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(111) for pid in sel_pids: ax.scatter(hits[hits.particle_id == pid][xname].values, hits[hits.particle_id == pid][yname].values) # add edges if with_edges: for iedge in range(edges.shape[0]): ax.plot(hits.iloc[edges[iedge]][xname].values,\ hits.iloc[edges[iedge]][yname].values, color='k', alpha=0.3, lw=1.) ax.set_xlabel(xlabel, fontsize=16) ax.set_ylabel(ylabel, fontsize=16) if xname == 'z': ax.set_xlim(-3000, 3000) trans = False if no_axis: ax.set_axis_off() trans = True plt.savefig(os.path.join(outdir, "{}.png".format(outname)), transparent=trans) plt.savefig(os.path.join(outdir, "{}.pdf".format(outname)), transparent=trans) def plot_hits(xname, yname, outname): fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(111) ax.scatter(hits[xname].values, hits[yname].values) if xname == 'z': ax.set_xlim(-3000, 3000) ax.set_xlabel(xname, fontsize=16) ax.set_ylabel(yname, fontsize=16) plt.savefig(os.path.join(outdir, "{}.pdf".format(outname))) plot_edges("x", 'y', 'x', 'y', 'embedding_edges_x_y') plot_edges("z", 'r', 'z', 'r', 'embedding_edges_z_r') plot_edges("x", 'y', 'x', 'y', 'embedding_edges_truth_x_y', edges=layerless_true_edges_t) plot_edges("z", 'r', 'z', 'r', 'embedding_edges_truth_z_r', edges=layerless_true_edges_t) plot_edges("x", 'y', 'x', 'y', 'embedding_hits_truth_x_y', with_edges=False) plot_edges("z", 'r', 'z', 'r', 'embedding_hits_truth_z_r', with_edges=False) plot_hits("x", 'y', 'embedding_hits_x_y') plot_hits("z", 'r', 'embedding_hits_z_r') plot_edges("x", 'y', 'x', 'y', 'embedding_front', no_axis=True)
def torch_geometric_graph_from_pdb_code(self, pdb_code, chain_selection='all', edge_construction=['contacts'], contact_file=None, encoding=False, k_nn=None, custom_edges=None): """ Produces a PyToch Geometric Data object from a protein structure :param k_nn: Specifies K nearest neighbours to use in KNN edge construction, defaults to None :type k_nn: int, optional :param custom_edges: User-supplied edges to use, defaults to None :type custom_edges: Pandas DataFrame, optional :param encoding: :type encoding: bool :param edge_construction: List containing edge construction to be used. ['contacts', 'distance', 'delaunay'], defaults to ['contacts'] :type edge_construction: list :param pdb_code: 4-character PDB accession code :type pdb_code: str :param chain_selection: Specifies polypeptide chains to include. e.g. one of {'A', 'B' ,'AB', 'BC'}, defaults to 'all' :type chain_selection: str :param contact_file: Path to contact file if using local file. :type contact_file: str :return: Pytorch Geometric Graph of protein structure. :rtype: PyTorch Geometric Data object """ assert encoding, 'Non-numeric feature encoding must be True' g, resiude_name_encoder, residue_id_encoder = self.dgl_graph_from_pdb_code( pdb_code=pdb_code, chain_selection=chain_selection, contact_file=contact_file, edge_construction=edge_construction, custom_edges=custom_edges, encoding=encoding, k_nn=k_nn) # Get node features from DGL graph and concatenate them node_feature_names = g.node_attr_schemes().keys() dgl_graph_features = [ g.ndata[feat].float() for feat in node_feature_names ] dgl_graph_features = [ f.unsqueeze(dim=1) if len(f.shape) == 1 else f for f in dgl_graph_features ] node_features = torch.cat(dgl_graph_features, dim=1) # Get edge features from DGL graph and concatenate them edge_types = g.edge_attr_schemes().keys() edge_feats = [g.edata[e].float() for e in edge_types] edge_feats = [ e.unsqueeze(dim=1) if len(e.shape) == 1 else e for e in edge_feats ] edge_feats = torch.cat(edge_feats, dim=1) # Create the Torch Geometric graph geom_graph = (Data(x=node_features, edge_index=torch.stack(g.edges(), dim=1), edge_attr=edge_feats)) print(geom_graph) return geom_graph
def download(self): assert len(self.path) == 2 path_data = self.path[0] path_label = self.path[1] labels = read_xlsx(path_label) labels = labels.astype({'subject': 'str'}) labels['SITE_ID'], uniques = pd.factorize(labels['SITE_ID']) labels['DX_GROUP'] = 2 - labels['DX_GROUP'] labels['SEX'] = labels['SEX'] - 1 labels = itemgetter('SITE_ID', 'DX_GROUP', 'DSM_IV_TR', 'AGE_AT_SCAN', 'SEX')(labels.set_index('subject').to_dict()) subjlist = os.listdir(path_data) filelist = [ fname for fname in os.listdir(os.path.join(path_data, subjlist[0])) if 'matrix' in fname ] with open(os.path.join(self.raw_dir, 'abide_raw_info.txt'), 'w') as f: print('Label info:', file=f) print('All labels:', 'SITE_ID', 'DX_GROUP', 'DSM_IV_TR', 'AGE_AT_SCAN', 'SEX') print('Site labels (0-n):', uniques.values, file=f) print('DX_GROUP (0/1):', 'control, autism', file=f) print('DSM_IV_TR (0-n):', 'control, autism, aspergers, PDD-NOS, aspergers or PDD-NOS', file=f) print('SEX (0/1):', 'M, F', file=f) print('\n', file=f) print('Features:', file=f) print(filelist, sep='\n', file=f) print('\n', file=f) print('Saved subjects:', file=f) print(subjlist, sep='\n', file=f) print('\n', file=f) dataset = {} for subj in subjlist: print('downloading', subj, '...') features = [] for file in filelist: filepath = os.path.join(path_data, subj, file) # origianl value (-1 ~ 1), adjust value of the matrix to 0 ~ 2 matrix = torch.tensor(np.loadtxt(filepath), dtype=torch.float32) + 1 features.append(matrix) y = { 'SITE_ID': labels[0][subj], 'DX_GROUP': labels[1][subj], 'DSM_IV_TR': labels[2][subj], 'AGE_AT_SCAN': labels[3][subj], 'SEX': labels[4][subj] } x = torch.ones([matrix.shape[0], 1], dtype=torch.float32) data = Data(x=x, y=y) data.features = features dataset[subj] = data with open(os.path.join(self.raw_dir, 'abide_raw.pkl'), 'wb') as f: pickle.dump(dataset, f) print('ABIDE dataset saved to path:', self.raw_dir)
def generate_torchgeom_dataset(data): """Returns dataset that can be used to train our model. Args: data (dict): Data dictionary with keys t, x, u. Returns: dataset (list): Array of torchgeometric Data objects. """ n_sims = data['u'].shape[0] dataset = [] for sim_ind in range(n_sims): print("{} / {}".format(sim_ind+1, n_sims)) x = data['x'][sim_ind] tri = Delaunay(x) neighbors = neighbors_from_delaunay(tri) # Find periodic couples and merge their neighborhoods origin_node = 0 corner_nodes = [] hor_couples = [] vert_couples = [] eps = 1.0e-6 b = x.ravel().max() # domain size for i in range(x.shape[0]): if is_near(x[i], [[b, 0], [0, b], [b, b]]): corner_nodes.append(i) elif is_near(x[i], [[0, 0]]): origin_node = i elif abs(x[i, 0]) < eps: # left boundary for j in range(x.shape[0]): if abs(x[j, 0] - b) < eps and abs(x[j, 1] - x[i, 1]) < eps: hor_couples.append([i, j]) elif abs(x[i, 1]) < eps: # bottom boundary for j in range(x.shape[0]): if abs(x[j, 1] - b) < eps and abs(x[j, 0] - x[i, 0]) < eps: vert_couples.append([i, j]) remove_nodes = [] # Merge corners for i in corner_nodes: neighbors[origin_node].extend(neighbors[i]) remove_nodes.append(i) # Merge horizontal couples for i, j in hor_couples: neighbors[i].extend(neighbors[j]) remove_nodes.append(j) # Merge vertical couples for i, j in vert_couples: neighbors[i].extend(neighbors[j]) remove_nodes.append(j) use_nodes = list(set(range(len(x))) - set(remove_nodes)) # Remove right and top boundaries neighbors = np.array(neighbors, dtype=np.object)[use_nodes] # Rewrite indices of the removed nodes map_domain = corner_nodes + [x[1] for x in hor_couples] + [x[1] for x in vert_couples] map_codomain = [origin_node]*3 + [x[0] for x in hor_couples] + [x[0] for x in vert_couples] map_inds = dict(zip(map_domain, map_codomain)) for i in range(len(neighbors)): for j in range(len(neighbors[i])): if neighbors[i][j] in remove_nodes: neighbors[i][j] = map_inds[neighbors[i][j]] neighbors[i] = list(set(neighbors[i])) # remove duplicates # Reset indices map_inds = dict(zip(use_nodes, range(len(use_nodes)))) for i in range(len(neighbors)): for j in range(len(neighbors[i])): neighbors[i][j] = map_inds[neighbors[i][j]] # ... edge_index = [] for i, _ in enumerate(neighbors): for _, neighbor in enumerate(neighbors[i]): if i == neighbor: continue edge = [i, neighbor] edge_index.append(edge) edge_index = np.array(edge_index).T # coords_use = data['x'][sim_ind, use_nodes] # coords_rem = data['x'][sim_ind, remove_nodes] # plt.scatter(coords_use[:, 0], coords_use[:, 1], s=3) # plt.scatter(coords_rem[:, 0], coords_rem[:, 1], s=3) # plt.savefig("tmp.png") # print(qwe) n = None print(f"generate_torchgeom_dataset() -> using {n} steps.") tg_data = Data( x=torch.Tensor(data['u'][sim_ind, 0, use_nodes, :]), edge_index=torch.Tensor(edge_index).long(), y=torch.Tensor(data['u'][sim_ind][0:n, use_nodes]).transpose(0, 1), pos=torch.Tensor(data['x'][sim_ind, use_nodes]), t=torch.Tensor(data['t'][sim_ind][0:n]), ) dataset.append(tg_data) return dataset
walk_length=walk_length, p=p, q=q) paraller.parser = self._sample for item in paraller.run(): if out is None: out = item.get() else: out = torch.cat((out, item.get()), 0) else: out = self._sample(0, start.size(0), start=start, walk_length=walk_length, p=p, q=q) return out if __name__ == "__main__": edge_index = torch.tensor([[0, 1, 1], [1, 0, 2]], dtype=torch.long) x = torch.tensor([[-1], [0], [1]], dtype=torch.float) edge_weight = torch.tensor([1, 0.2, 0.8]) start = torch.tensor([0, 1, 2]) data = Data(x=x, edge_index=edge_index) rw = RandomWalk(data, edge_weight=edge_weight, is_parallel=False, reverse=True) print(rw.walk(start, walk_length=5, p=0.2, q=0.5))
def test_graph_saint(): adj = torch.tensor([ [+1, +2, +3, +0, +4, +0], [+5, +6, +0, +7, +0, +8], [+9, +0, 10, +0, 11, +0], [+0, 12, +0, 13, +0, 14], [15, +0, 16, +0, 17, +0], [+0, 18, +0, 19, +0, 20], ]) edge_index = adj.nonzero(as_tuple=False).t() edge_type = adj[edge_index[0], edge_index[1]] x = torch.Tensor([[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]) data = Data(edge_index=edge_index, x=x, edge_type=edge_type, num_nodes=6) torch.manual_seed(12345) loader = GraphSAINTNodeSampler(data, batch_size=3, num_steps=4, sample_coverage=10, log=False) sample = next(iter(loader)) assert sample.x.tolist() == [[2, 2], [4, 4], [5, 5]] assert sample.edge_index.tolist() == [[0, 0, 1, 1, 2], [0, 1, 0, 1, 2]] assert sample.edge_type.tolist() == [10, 11, 16, 17, 20] assert len(loader) == 4 for sample in loader: assert len(sample) == 5 assert sample.num_nodes <= 3 assert sample.num_edges <= 3 * 4 assert sample.node_norm.numel() == sample.num_nodes assert sample.edge_norm.numel() == sample.num_edges torch.manual_seed(12345) loader = GraphSAINTEdgeSampler(data, batch_size=2, num_steps=4, sample_coverage=10, log=False) sample = next(iter(loader)) assert sample.x.tolist() == [[0, 0], [2, 2], [3, 3]] assert sample.edge_index.tolist() == [[0, 0, 1, 1, 2], [0, 1, 0, 1, 2]] assert sample.edge_type.tolist() == [1, 3, 9, 10, 13] assert len(loader) == 4 for sample in loader: assert len(sample) == 5 assert sample.num_nodes <= 4 assert sample.num_edges <= 4 * 4 assert sample.node_norm.numel() == sample.num_nodes assert sample.edge_norm.numel() == sample.num_edges torch.manual_seed(12345) loader = GraphSAINTRandomWalkSampler(data, batch_size=2, walk_length=1, num_steps=4, sample_coverage=10, log=False) sample = next(iter(loader)) assert sample.x.tolist() == [[1, 1], [2, 2], [4, 4]] assert sample.edge_index.tolist() == [[0, 1, 1, 2, 2], [0, 1, 2, 1, 2]] assert sample.edge_type.tolist() == [6, 10, 11, 16, 17] assert len(loader) == 4 for sample in loader: assert len(sample) == 5 assert sample.num_nodes <= 4 assert sample.num_edges <= 4 * 4 assert sample.node_norm.numel() == sample.num_nodes assert sample.edge_norm.numel() == sample.num_edges
def process(self): import gzip import pandas as pd import rdflib as rdf graph_file, task_file, train_file, test_file = self.raw_paths with hide_stdout(): g = rdf.Graph() with gzip.open(graph_file, 'rb') as f: g.parse(file=f, format='nt') freq = Counter(g.predicates()) relations = sorted(set(g.predicates()), key=lambda p: -freq.get(p, 0)) subjects = set(g.subjects()) objects = set(g.objects()) nodes = list(subjects.union(objects)) N = len(nodes) R = 2 * len(relations) relations_dict = {rel: i for i, rel in enumerate(relations)} nodes_dict = {node: i for i, node in enumerate(nodes)} edges = [] for s, p, o in g.triples((None, None, None)): src, dst, rel = nodes_dict[s], nodes_dict[o], relations_dict[p] edges.append([src, dst, 2 * rel]) edges.append([dst, src, 2 * rel + 1]) edges = torch.tensor(edges, dtype=torch.long).t().contiguous() perm = (N * R * edges[0] + R * edges[1] + edges[2]).argsort() edges = edges[:, perm] edge_index, edge_type = edges[:2], edges[2] if self.name == 'am': label_header = 'label_cateogory' nodes_header = 'proxy' elif self.name == 'aifb': label_header = 'label_affiliation' nodes_header = 'person' elif self.name == 'mutag': label_header = 'label_mutagenic' nodes_header = 'bond' elif self.name == 'bgs': label_header = 'label_lithogenesis' nodes_header = 'rock' labels_df = pd.read_csv(task_file, sep='\t') labels_set = set(labels_df[label_header].values.tolist()) labels_dict = {lab: i for i, lab in enumerate(list(labels_set))} nodes_dict = {np.unicode(key): val for key, val in nodes_dict.items()} train_labels_df = pd.read_csv(train_file, sep='\t') train_indices, train_labels = [], [] for nod, lab in zip(train_labels_df[nodes_header].values, train_labels_df[label_header].values): train_indices.append(nodes_dict[nod]) train_labels.append(labels_dict[lab]) train_idx = torch.tensor(train_indices, dtype=torch.long) train_y = torch.tensor(train_labels, dtype=torch.long) test_labels_df = pd.read_csv(test_file, sep='\t') test_indices, test_labels = [], [] for nod, lab in zip(test_labels_df[nodes_header].values, test_labels_df[label_header].values): test_indices.append(nodes_dict[nod]) test_labels.append(labels_dict[lab]) test_idx = torch.tensor(test_indices, dtype=torch.long) test_y = torch.tensor(test_labels, dtype=torch.long) if not self.hetero: data = Data(edge_index=edge_index, edge_type=edge_type, train_idx=train_idx, train_y=train_y, test_idx=test_idx, test_y=test_y, num_nodes=N) else: data = HeteroData( v={ 'train_idx': train_idx, 'train_y': train_y, 'test_idx': test_idx, 'test_y': test_y, 'num_nodes': N, }) for i in range(R): mask = edge_type == i data['v', f'{i}', 'v'].edge_index = edge_index[:, mask] torch.save(self.collate([data]), self.processed_paths[0])
def to_graph(self, threshold=None, format='edge_list', split=True, frac=[0.7, 0.1, 0.2], seed=42, order='descending'): """Add a method description here. Parameters ---------- threshold : Add a variable description here. format : Add a variable description here. split : Add a variable description here. frac : list, optional (default=frac=[0.7, 0.1, 0.2]) Train/val/test split fractions. seed : int Add a variable description here. order : Add a variable description here. Returns ------- """ ''' Arguments: format: edge_list / dgl / pyg df object ''' df = self.get_data(format='df') if len(np.unique(self.raw_y)) > 2: print("The dataset label consists of affinity scores. " "Binarization using threshold " + str(threshold) + " is conducted to construct the positive edges in the network. " "Adjust the threshold by to_graph(threshold = X)", flush=True, file=sys.stderr) if threshold is None: raise AttributeError( "Please specify the threshold to binarize the data by " "'to_graph(threshold = N)'!") df['label_binary'] = label_transform(self.raw_y, True, threshold, False, verbose=False, order=order) else: # already binary df['label_binary'] = df['Y'] df[self.entity1_name + '_ID'] = df[self.entity1_name + '_ID'].astype(str) df[self.entity2_name + '_ID'] = df[self.entity2_name + '_ID'].astype(str) df_pos = df[df.label_binary == 1] df_neg = df[df.label_binary == 0] return_dict = {} pos_edges = df_pos[ [self.entity1_name + '_ID', self.entity2_name + '_ID']].values neg_edges = df_neg[ [self.entity1_name + '_ID', self.entity2_name + '_ID']].values edges = df[ [self.entity1_name + '_ID', self.entity2_name + '_ID']].values if format == 'edge_list': return_dict['edge_list'] = pos_edges return_dict['neg_edges'] = neg_edges elif format == 'dgl': try: import dgl except: install("dgl") import dgl unique_entities = np.unique(pos_edges.T.flatten()).tolist() index = list(range(len(unique_entities))) dict_ = dict(zip(unique_entities, index)) edge_list1 = np.array([dict_[i] for i in pos_edges.T[0]]) edge_list2 = np.array([dict_[i] for i in pos_edges.T[1]]) return_dict['dgl_graph'] = dgl.DGLGraph((edge_list1, edge_list2)) return_dict['index_to_entities'] = dict_ elif format == 'pyg': try: import torch from torch_geometric.data import Data except: raise ImportError( "Please see https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html to install pytorch geometric!") unique_entities = np.unique(pos_edges.T.flatten()).tolist() index = list(range(len(unique_entities))) dict_ = dict(zip(unique_entities, index)) edge_list1 = np.array([dict_[i] for i in pos_edges.T[0]]) edge_list2 = np.array([dict_[i] for i in pos_edges.T[1]]) edge_index = torch.tensor([edge_list1, edge_list2], dtype=torch.long) x = torch.tensor(np.array(index), dtype=torch.float) data = Data(x=x, edge_index=edge_index) return_dict['pyg_graph'] = data return_dict['index_to_entities'] = dict_ elif format == 'df': return_dict['df'] = df if split: return_dict['split'] = create_fold(df, seed, frac) return return_dict
def sampler_generater(self, batch, le): """ This function passes batch index number to obtained trained object """ deep_pthway = Data() newpthway_Namelist = self.data.pthway_NameList.iloc[batch,:].reset_index(drop=True) deep_pthway.genome_Namelist = newpthway_Namelist[newpthway_Namelist['GenomeType'] == 'protein']['GenomeName'].values activ_id = le.transform(deep_pthway.genome_Namelist) deep_pthway.activ_free = self.data.activ_free[activ_id] deep_pthway.activ_cancer = self.data.activ_cancer[activ_id] deep_pthway.pth_Namelist = newpthway_Namelist Edgelist = self.data.Edgelist Namelist_l = list(newpthway_Namelist['GenomeName'].values) Edgelist_l = list(Edgelist.iloc[:,0].values) Edgelist_ll = list(Edgelist.iloc[:,1].values) exclude_list = [] for idx, (elem, elem2) in enumerate(zip(Edgelist_l, Edgelist_ll)): if ((elem not in Namelist_l) or (elem2 not in Namelist_l)): exclude_list.append(idx) newpthway_Edgelist = Edgelist.drop(exclude_list).reset_index(drop=True) deep_pthway.Edgelist = newpthway_Edgelist le2 = LabelEncoder() le2.fit(deep_pthway.pth_Namelist['GenomeName'].values) deep_pthway.edge_index = le2.transform(deep_pthway.Edgelist.iloc[:,:2].values.reshape(-1)).reshape(-1,2) deep_pthway.all_elem_className = list(le2.classes_) # Label edge_class le2 = LabelEncoder() le2.fit(deep_pthway.Edgelist['edgeType']) deep_pthway.edge_class = le2.transform(deep_pthway.Edgelist['edgeType']) deep_pthway.edge_className = list(le2.classes_) # Label node class le2 = LabelEncoder() le2.fit(deep_pthway.pth_Namelist['GenomeType']) deep_pthway.node_class = le2.transform(deep_pthway.pth_Namelist['GenomeType']) deep_pthway.node_className = list(le2.classes_) return deep_pthway
def run_model(dataset, conf): # ## 1) Build Table graph # ### Tables tokenization tokenized_tables, vocabulary, cell_dict, reversed_dictionary = corpus_tuple = create_corpus( dataset, include_attr=conf["add_attr"]) if conf["shuffle_vocab"] == True: shuffled_vocab = shuffle_vocabulary(vocabulary) else: shuffled_vocab = None nodes = build_node_features(vocabulary) row_edges_index, row_edges_weights = build_graph_edges( tokenized_tables, s_vocab=shuffled_vocab, sample_frac=conf["row_edges_sample"], columns=False) col_edges_index, col_edges_weights = build_graph_edges( tokenized_tables, s_vocab=shuffled_vocab, sample_frac=conf["column_edges_sample"], columns=True) all_row_edges_index, all_row_edges_weights = build_graph_edges( tokenized_tables, s_vocab=shuffled_vocab, sample_frac=1.0, columns=False) all_col_edges_index, all_col_edges_weights = build_graph_edges( tokenized_tables, s_vocab=shuffled_vocab, sample_frac=1.0, columns=True) all_possible_edges = torch.cat((all_row_edges_index, all_col_edges_index), dim=1) edges = torch.cat((row_edges_index, col_edges_index), dim=1) weights = torch.cat((row_edges_weights, col_edges_weights), dim=0) graph_data = Data(x=nodes, edge_index=edges, edge_attr=weights) # ## 2 ) Run Table Auto-Encoder Model: device = 'cuda' if torch.cuda.is_available() else 'cpu' loader = DataLoader(torch.arange(graph_data.num_nodes), batch_size=128, shuffle=True) graph_data = graph_data.to(device) x, train_pos_edge_index = nodes, edges EPS = 1e-15 MAX_LOGVAR = 10 class TVGAE(GAE): r"""The Variational Graph Auto-Encoder model from the `"Variational Graph Auto-Encoders" <https://arxiv.org/abs/1611.07308>`_ paper. Args: encoder (Module): The encoder module to compute :math:`\mu` and :math:`\log\sigma^2`. decoder (Module, optional): The decoder module. If set to :obj:`None`, will default to the :class:`torch_geometric.nn.models.InnerProductDecoder`. (default: :obj:`None`) """ def __init__(self, encoder, decoder=None): super(TVGAE, self).__init__(encoder, decoder) def reparametrize(self, mu, logvar): if self.training: return mu + torch.randn_like(logvar) * torch.exp(logvar) else: return mu def encode(self, *args, **kwargs): """""" self.__rmu__, self.__rlogvar__, self.__cmu__, self.__clogvar__ = self.encoder( *args, **kwargs) self.__rlogvar__ = self.__rlogvar__.clamp(max=MAX_LOGVAR) self.__clogvar__ = self.__clogvar__.clamp(max=MAX_LOGVAR) zr = self.reparametrize(self.__rmu__, self.__rlogvar__) zc = self.reparametrize(self.__cmu__, self.__clogvar__) z = torch.cat((zr, zc), 0) return z def kl_loss(self): rmu = self.__rmu__ rlogvar = self.__rlogvar__ cmu = self.__cmu__ clogvar = self.__clogvar__ rkl = -0.5 * torch.mean( torch.sum(1 + rlogvar - rmu**2 - rlogvar.exp(), dim=1)) ckl = -0.5 * torch.mean( torch.sum(1 + clogvar - rmu**2 - clogvar.exp(), dim=1)) return (rkl, ckl) def recon_loss(self, z, pos_edge_index, all_possible_edges): EPS = 1e-15 MAX_LOGVAR = 10 pos_loss = -torch.log( model.decoder(z, pos_edge_index, sigmoid=True) + EPS).mean() # Do not include self-loops in negative samples pos_edge_index, _ = remove_self_loops(pos_edge_index) pos_edge_index, _ = add_self_loops(pos_edge_index) neg_edge_index = negative_sampling(all_possible_edges, z.size(0)) neg_loss = -torch.log(1 - model.decoder( z, neg_edge_index, sigmoid=True) + EPS).mean() return pos_loss + neg_loss class Encoder(torch.nn.Module): def __init__(self, in_channels, out_channels): super(Encoder, self).__init__() self.conv_rows = GCNConv(in_channels, 2 * out_channels, cached=True) self.conv_cols = GCNConv(in_channels, 2 * out_channels, cached=True) self.conv_rmu = GCNConv(2 * out_channels, out_channels, cached=True) self.conv_rlogvar = GCNConv(2 * out_channels, out_channels, cached=True) self.conv_cmu = GCNConv(2 * out_channels, out_channels, cached=True) self.conv_clogvar = GCNConv(2 * out_channels, out_channels, cached=True) def forward(self, x, row_edge_index, col_edge_index): xr = F.relu(self.conv_rows(x, row_edge_index)) xc = F.relu(self.conv_cols(x, col_edge_index)) return self.conv_rmu(xr, row_edge_index),\ self.conv_rlogvar(xr, row_edge_index),\ self.conv_cmu(xc, col_edge_index),\ self.conv_clogvar(xc, col_edge_index) channels = conf["vector_size"] enc = Encoder(graph_data.num_features, channels) model = TVGAE(enc) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.01) def train(model, optimizer, x, row_edges, col_edges): model.train() optimizer.zero_grad() z = model.encode(x, row_edges, col_edges) mid = int(len(z) / 2) zr = z[:mid] zc = z[mid:] #recon loss: rrl = model.recon_loss(zr, row_edges, all_possible_edges) crl = model.recon_loss(zc, col_edges, all_possible_edges) #loss = rrl+crl rkl, ckl = model.kl_loss() #loss = rkl+ckl loss = rrl + crl + rkl + ckl loss.backward() optimizer.step() #return loss,rrl,crl return loss, rrl, crl, rkl, ckl def get_cell_vectors(model, x, row_edges_index, col_edges_index): model.eval() with torch.no_grad(): z = model.encode(x, row_edges_index, col_edges_index) cell_vectors = z.numpy() return z, cell_vectors losses = [] results = [] for epoch in range(conf["epoch_num"]): #loss,row_loss,col_loss = train(model,optimizer,x,row_edges_index,col_edges_index) loss = train(model, optimizer, x, row_edges_index, col_edges_index) losses.append(loss) print(epoch, loss) z, cell_vectors = get_cell_vectors(model, x, row_edges_index, col_edges_index) vec_list = generate_table_vectors(cell_vectors, tokenized_tables, s_vocab=shuffled_vocab) result_score = evaluate_model(dataset, vec_list, k=5) print(result_score) results.append(result_score) # ### 3) Extract the latent cell vectors, generate table vectors: #z,cell_vectors = get_cell_vectors(model,x,train_pos_edge_index) #vec_list=generate_table_vectors(cell_vectors,tokenized_tables,s_vocab=shuffled_vocab) # ## 3) Evaluate the model #result_score=evaluate_model(dataset,vec_list,k=5) return cell_vectors, vec_list, losses, results
def _generate_data(self): data = Data( pos=torch.randn((self.num_points, 3)), x=torch.randn((self.num_points, self.feature_size)) if self.feature_size else None, y=torch.randint(0, 10, (self.num_points, )), category=self._category, ) if self.include_box: num_boxes = 10 data.center_label = torch.randn(num_boxes, 3) data.heading_class_label = torch.zeros((num_boxes, )) data.heading_residual_label = torch.randn((num_boxes, )) data.size_class_label = torch.randint(0, len(self.mean_size_arr), (num_boxes, )) data.size_residual_label = torch.randn(num_boxes, 3) data.sem_cls_label = torch.randint(0, 10, (num_boxes, )) data.box_label_mask = torch.randint(0, 1, (num_boxes, )).bool() data.vote_label = torch.randn(self.num_points, 9) data.vote_label_mask = torch.randint(0, 1, (self.num_points, )).bool() if self.panoptic: data.num_instances = torch.tensor([10]) data.center_label = torch.randn((self.num_points, 3)) data.y = torch.randint(0, 10, (self.num_points, )) data.instance_labels = torch.randint(0, 20, (self.num_points, )) data.instance_mask = torch.rand(self.num_points).bool() data.vote_label = torch.randn((self.num_points, 3)) return data
# print(img.size()) fake_RGB_image = fake_RGB_image[:,128:256,128:256,:] real_RGB_image = real_RGB_image[:,128:256,128:256,:] # print(fake_B.size()) fake_RGB_image = fake_RGB_image.transpose(1,3).transpose(2,3) real_RGB_image = real_RGB_image.transpose(1,3).transpose(2,3) # print(fake_B.size()) # image based D pred_fake = discriminator(fake_RGB_image, real_A) loss_GAN = criterion_GAN(pred_fake, valid) * lambda_IMAGE # point based D point_cloud_fake = Data(pos=point, x=fake_RGB) point_cloud_fake = Batch.from_data_list([point_cloud_fake]) print(point_cloud_fake) pred_fake_point = pouintD(point_cloud_fake) loss_GAN_point = criterion_GAN(pred_fake_point, valid_P) * lambda_POINT # Pixel-wise loss loss_pixel = criterion_pixelwise(fake_RGB_image, real_RGB_image) * lambda_IMAGE # Point-wise loss loss_point = criterion_pixelwise(fake_RGB, real_RGB) * lambda_POINT # Total loss loss_G = lambda_GAN * loss_GAN + loss_GAN_point + lambda_pixel * loss_pixel + lambda_pixel * loss_point loss_G.backward() if (i+1)%1 == 0: optimizer_G.step()
def __getitem__(self, index): return Data(x=torch.FloatTensor(self.x[index]), edge_index=torch.tensor(self.tuopu[index]), user_y=torch.tensor(self.user_y[index]), y=torch.tensor([self.group_y[index]]))
def __init__(self, root, npoints=20000, transform=None): self.npoints = npoints self.root = root self.pointlist = [] self.rgblist = [] self.datalist = [] self.transform = transform self.pointpath = root + "/pointcloud_path/" print(self.pointpath ) self.point_list = glob.glob(self.pointpath + "/*.las")[:] print(self.point_list ) count = 0 for file in self.point_list: print(file) # point cloud 取得 file_h = laspy.file.File(file, mode='r') print(file_h.header.min[0]) print(file_h.header.min[2]) print(file_h.header.min[1]) src = np.vstack([file_h.x, file_h.y, file_h.z]).transpose() if(len(src)<npoints):continue rgb = np.vstack([file_h.red, file_h.green, file_h.blue]).transpose() rgb = rgb/255.0 print(np.amin(rgb, axis=0)) print(np.amax(rgb, axis=0)) points = file_h.points['point'] attr_names = [a for a in points.dtype.names] + ImgtoPointDataset.ATTR_EXTRA_LIST features = np.array([getattr(file_h, name) for name in attr_names if name not in ImgtoPointDataset.ATTR_EXLUSION_LIST]).transpose() print(features[:,1]) features = features/1.0 names = [name for name in attr_names if name not in ImgtoPointDataset.ATTR_EXLUSION_LIST] print(names) file_h.close() pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(src) pcd.colors = o3d.utility.Vector3dVector(rgb) # cl, ind = pcd.remove_radius_outlier(nb_points=16, radius=0.05) cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) pcd = pcd.select_down_sample(ind) src = np.asarray(pcd.points) rgb = np.asarray(pcd.colors) normlized_xyz = np.zeros((npoints, 3)) normlized_rgb = np.zeros((npoints, 3)) normlized_feature = np.zeros((npoints, 3)) self.coord_min, self.coord_max = np.amin(src, axis=0)[:3], np.amax(src, axis=0)[:3] if(self.coord_max[0]==0):continue if(self.coord_max[1]==0):continue if(self.coord_max[2]==0):continue print(np.amin(src, axis=0)[:3] ) print(np.amax(src, axis=0)[:3] ) src[:, 0] = ((src[:, 0] - self.coord_min[0])/30.0) - 0.5 src[:, 1] = ((src[:, 1] - self.coord_min[1])/30.0) - 0.5 src[:, 2] = ((src[:, 2] - self.coord_min[2])/30.0) features[:,0] = features[:,0]/ 4000.0 #'intensity', 'raw_classification', 'num_returns'] features[:,1] = features[:,1]/ 17.0 features[:,2] = features[:,2]/ 8.0 print(np.amin(src, axis=0)[:3] ) print(np.amax(src, axis=0)[:3] ) if(len(src) >=npoints): normlized_xyz[:,:]=src[:npoints,:] normlized_rgb[:,:]=rgb[:npoints,:] normlized_feature[:,:] = features[:npoints,:] else: normlized_xyz[:len(src),:]=src[:,:] self.pointlist.append(normlized_xyz) self.rgblist.append(normlized_rgb) normlized_xyz = torch.from_numpy(normlized_xyz).float() random_features = torch.randn(npoints,6) random_features[:, :3] = torch.from_numpy(normlized_feature).float() self.datalist.append(Data(pos=normlized_xyz[:, :], x=random_features[ :, :3])) self.data_num = len(self.pointlist)
import torch from torch_geometric.data import Data edge_index = torch.tensor([[3, 1, 1, 2], [1, 3, 2, 1]], dtype=torch.long) # 注意x是二维的,不是一维的,每一行代表一个节点的特征向量,此处特征维度为1 x = torch.tensor([[-1], [0], [1]], dtype=torch.float) data = Data(x=x, edge_index=edge_index) print(data) ''' # 通过节点对的方式给出 edge_index = torch.tensor([ [0, 1], [1, 0], [1, 2], [2, 1] ], dtype=torch.long) data = Data(x=x, edge_index=edge_index.t().contiguous()) print(data) ''' # 输出data的属性关键字,只有传递参数的才会被输出 print(data.keys) # ['x', 'edge_index'] # 按照关键字进行输出,注意是字符串 print(data['x']) # tensor([[-1.], # [ 0.], # [ 1.]]) print(data['edge_index']) # tensor([[0, 1, 1, 2], # [1, 0, 2, 1]]) print('edge_attr: ', data['edge_attr'])
def dataset_to_graphs(glycan_list, labels, libr=None, label_type=torch.long, separate=False, context=False, error_catch=False, wo_labels=False): """wrapper function to convert a whole list of glycans into a graph dataset\n | Arguments: | :- | glycan_list (list): list of IUPAC-condensed glycan sequences as strings | labels (list): list of labels | label_type (torch object): which tensor type for label, default is torch.long for binary labels, change to torch.float for continuous | separate (bool): True returns node list / edge list / label list as separate files; False returns list of data tuples; default is False | libr (list): sorted list of unique glycoletters observed in the glycans of our dataset | context (bool): legacy-ish; used for generating graph context dataset for pre-training; keep at False | error_catch (bool): troubleshooting option, True will print glycans that cannot be converted into graphs; default is False | wo_labels (bool): change to True if you do not want to pass and receive labels; default is False\n | Returns: | :- | Returns list of node list / edge list / label list data tuples """ if libr is None: libr = lib if error_catch: glycan_graphs = [] for k in glycan_list: try: glycan_graphs.append(glycan_to_graph(k, libr)) except: print(k) else: glycan_graphs = [glycan_to_graph(k, libr) for k in glycan_list] if separate: glycan_nodes, glycan_edges = zip(*glycan_graphs) return list(glycan_nodes), list(glycan_edges), labels else: if context: contexts = [ggraph_to_context(k, lib=lib) for k in glycan_graphs] labels = [k[1] for k in contexts] labels = [item for sublist in labels for item in sublist] contexts = [k[0] for k in contexts] contexts = [item for sublist in contexts for item in sublist] data = [ Data(x=torch.tensor(contexts[k][0], dtype=torch.long), y=torch.tensor(labels[k], dtype=label_type), edge_index=torch.tensor( [contexts[k][1][0], contexts[k][1][1]], dtype=torch.long)) for k in range(len(contexts)) ] return data else: if wo_labels: glycan_nodes, glycan_edges = zip(*glycan_graphs) glycan_graphs = list(zip(glycan_nodes, glycan_edges)) data = [ Data(x=torch.tensor(k[0], dtype=torch.long), edge_index=torch.tensor([k[1][0], k[1][1]], dtype=torch.long)) for k in glycan_graphs ] return data else: glycan_nodes, glycan_edges = zip(*glycan_graphs) glycan_graphs = list(zip(glycan_nodes, glycan_edges, labels)) data = [ Data(x=torch.tensor(k[0], dtype=torch.long), y=torch.tensor([k[2]], dtype=label_type), edge_index=torch.tensor([k[1][0], k[1][1]], dtype=torch.long)) for k in glycan_graphs ] return data
def read_one_scan( scannet_dir, scan_name, label_map_file, donotcare_class_ids, max_num_point, obj_class_ids, use_instance_labels=True, use_instance_bboxes=True, ): mesh_file = osp.join(scannet_dir, scan_name, scan_name + "_vh_clean_2.ply") agg_file = osp.join(scannet_dir, scan_name, scan_name + ".aggregation.json") seg_file = osp.join(scannet_dir, scan_name, scan_name + "_vh_clean_2.0.010000.segs.json") meta_file = osp.join( scannet_dir, scan_name, scan_name + ".txt" ) # includes axisAlignment info for the train set scans. mesh_vertices, semantic_labels, instance_labels, instance_bboxes, instance2semantic = export( mesh_file, agg_file, seg_file, meta_file, label_map_file, None ) # Discard unwanted classes mask = np.logical_not(np.in1d(semantic_labels, donotcare_class_ids)) mesh_vertices = mesh_vertices[mask, :] semantic_labels = semantic_labels[mask] instance_labels = instance_labels[mask] bbox_mask = np.in1d(instance_bboxes[:, -1], obj_class_ids) instance_bboxes = instance_bboxes[bbox_mask, :] # Subsample N = mesh_vertices.shape[0] if max_num_point: if N > max_num_point: choices = np.random.choice(N, max_num_point, replace=False) mesh_vertices = mesh_vertices[choices, :] semantic_labels = semantic_labels[choices] instance_labels = instance_labels[choices] # Remap labels to [0-(len(valid_labels))] count = 0 for i in range(max(Scannet.SCANNET_COLOR_MAP.keys()) + 1): if i in Scannet.VALID_CLASS_IDS: label = count count += 1 else: label = Scannet.IGNORE_LABEL mask = semantic_labels == i semantic_labels[mask] = label # Build data container data = {} data["pos"] = torch.from_numpy(mesh_vertices[:, :3]) data["rgb"] = torch.from_numpy(mesh_vertices[:, 3:]) / 255.0 data["y"] = torch.from_numpy(semantic_labels) data["x"] = None if use_instance_labels: data["iy"] = torch.from_numpy(instance_labels) if use_instance_bboxes: data["bbox"] = torch.from_numpy(instance_bboxes) return Data(**data)
def __getitem__(self, item): if self.cache_data: if item in self.data_dict.keys(): return self.data_dict[item] else: pass pdbid, pose, affinity = self.data_list[item] node_feats, coords = None, None with h5py.File(self.data_file, "r") as f: if ( not self.dataset_name in f[ "{}/{}/{}".format( pdbid, self.feature_type, self.preprocessing_type ) ].keys() ): print(pdbid) return None if self.use_docking: # TODO: the next line will cuase runtime error because not selelcting poses data = f[ "{}/{}/{}/{}".format( pdbid, self.feature_type, self.preprocessing_type, self.dataset_name, ) ][pose]["data"] vdw_radii = ( f[ "{}/{}/{}/{}".format( pdbid, self.feature_type, self.preprocessing_type, self.dataset_name, ) ][pose] .attrs["van_der_waals"] .reshape(-1, 1) ) else: data = f[ "{}/{}/{}/{}".format( pdbid, self.feature_type, self.preprocessing_type, self.dataset_name, ) ]["data"] vdw_radii = ( f[ "{}/{}/{}/{}".format( pdbid, self.feature_type, self.preprocessing_type, self.dataset_name, ) ] .attrs["van_der_waals"] .reshape(-1, 1) ) if self.feature_type == "pybel": coords = data[:, 0:3] node_feats = np.concatenate([vdw_radii, data[:, 3:22]], axis=1) else: raise NotImplementedError # account for the vdw radii in distance cacluations (consider each atom as a sphere, distance between spheres) dists = pairwise_distances(coords, metric="euclidean") edge_index, edge_attr = dense_to_sparse(torch.from_numpy(dists).float()) x = torch.from_numpy(node_feats).float() y = torch.FloatTensor(affinity).view(-1, 1) data = Data( x=x, edge_index=edge_index, edge_attr=edge_attr.view(-1, 1), y=y ) if self.cache_data: if self.output_info: self.data_dict[item] = (pdbid, pose, data) else: self.data_dict[item] = data return self.data_dict[item] else: if self.output_info: return (pdbid, pose, data) else: return data
from torch_geometric.utils import k_hop_subgraph, from_networkx import pickle import networkx as nx from math import floor prefix = '/gpfs_home/spate116/singhlab/GCN_Integration/scripts/BI/examples/syn/' G = nx.read_gpickle( prefix + 'data/syn4_G.pickle') with open(prefix + 'data/syn4_lab.pickle', 'rb') as f: labels = pickle.load(f) x = torch.tensor([x[1]['feat'] for x in G.nodes(data=True)]) edge_index = torch.tensor([x for x in G.edges]) edge_index_flipped = edge_index[:, [1, 0]] edge_index = torch.cat((edge_index, edge_index_flipped)) y = torch.tensor(labels, dtype=torch.long) data = Data(x=x, edge_index=edge_index.T, y=y) class Net(torch.nn.Module): def __init__(self, k=1, x=64): super(Net, self).__init__() self.conv1 = GCNConv(10, x) self.conv2 = GCNConv(x, x) self.conv3 = GCNConv(x, max(y).tolist()+1) def forward(self, x, edge_index): x = F.leaky_relu(self.conv1(x, edge_index)) x = F.leaky_relu(self.conv2(x, edge_index)) x = self.conv3(x, edge_index) return x # Load everything onto the gpu if available
def process_set(self, dataset): if self.dataset == 'ins_seg_h5': raw_path = osp.join(self.raw_dir, 'ins_seg_h5_for_sgpn', self.dataset) categories = glob(osp.join(raw_path, '*')) categories = sorted([x.split(os.sep)[-1] for x in categories]) data_list = [] for target, category in enumerate(tqdm(categories)): folder = osp.join(raw_path, category) paths = glob('{}/{}-*.h5'.format(folder, dataset)) labels, nors, opacitys, pts, rgbs = [], [], [], [], [] for path in paths: f = h5py.File(path) pts += torch.from_numpy(f['pts'][:]).unbind(0) labels += torch.from_numpy(f['label'][:]).to( torch.long).unbind(0) nors += torch.from_numpy(f['nor'][:]).unbind(0) opacitys += torch.from_numpy(f['opacity'][:]).unbind(0) rgbs += torch.from_numpy(f['rgb'][:]).to( torch.float32).unbind(0) for i, (pt, label, nor, opacity, rgb) in enumerate( zip(pts, labels, nors, opacitys, rgbs)): data = Data(pos=pt[:, :3], y=label, norm=nor[:, :3], x=torch.cat( (opacity.unsqueeze(-1), rgb / 255.), 1)) if self.pre_filter is not None and not self.pre_filter( data): continue if self.pre_transform is not None: data = self.pre_transform(data) data_list.append(data) else: raw_path = osp.join(self.raw_dir, self.dataset) categories = glob(osp.join(raw_path, self.object)) categories = sorted([x.split(os.sep)[-1] for x in categories]) data_list = [] # class_name = [] for target, category in enumerate(tqdm(categories)): folder = osp.join(raw_path, category) paths = glob('{}/{}-*.h5'.format(folder, dataset)) labels, pts = [], [] # clss = category.split('-')[0] for path in paths: f = h5py.File(path) pts += torch.from_numpy(f['data'][:].astype( np.float32)).unbind(0) labels += torch.from_numpy(f['label_seg'][:].astype( np.float32)).to(torch.long).unbind(0) for i, (pt, label) in enumerate(zip(pts, labels)): data = Data(pos=pt[:, :3], y=label) # data = PartData(pos=pt[:, :3], y=label, clss=clss) if self.pre_filter is not None and not self.pre_filter( data): continue if self.pre_transform is not None: data = self.pre_transform(data) data_list.append(data) return self.collate(data_list)
def process(self): if models is None or T is None or Image is None: raise ImportError('Package `torchvision` could not be found.') splits = np.load(osp.join(self.raw_dir, 'splits.npz'), allow_pickle=True) category_idx = self.categories.index(self.category) train_split = list(splits['train'])[category_idx] test_split = list(splits['test'])[category_idx] image_path = osp.join(self.raw_dir, 'images', 'JPEGImages') info_path = osp.join(self.raw_dir, 'images', 'Annotations') annotation_path = osp.join(self.raw_dir, 'annotations') labels = {} vgg16_outputs = [] def hook(module, x, y): vgg16_outputs.append(y.to('cpu')) vgg16 = models.vgg16(pretrained=True).to(self.device) vgg16.eval() vgg16.features[20].register_forward_hook(hook) # relu4_2 vgg16.features[25].register_forward_hook(hook) # relu5_1 transform = T.Compose([ T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_data_list, test_data_list = [], [] for i, name in enumerate(chain(train_split, test_split)): filename = '_'.join(name.split('/')[1].split('_')[:-1]) idx = int(name.split('_')[-1].split('.')[0]) - 1 path = osp.join(info_path, '{}.xml'.format(filename)) obj = minidom.parse(path).getElementsByTagName('object')[idx] trunc = obj.getElementsByTagName('truncated')[0].firstChild.data occ = obj.getElementsByTagName('occluded') occ = '0' if len(occ) == 0 else occ[0].firstChild.data diff = obj.getElementsByTagName('difficult')[0].firstChild.data if bool(int(trunc)) or bool(int(occ)) or bool(int(diff)): continue if self.category == 'person' and int(filename[:4]) > 2008: continue xmin = float(obj.getElementsByTagName('xmin')[0].firstChild.data) xmax = float(obj.getElementsByTagName('xmax')[0].firstChild.data) ymin = float(obj.getElementsByTagName('ymin')[0].firstChild.data) ymax = float(obj.getElementsByTagName('ymax')[0].firstChild.data) box = (xmin, ymin, xmax, ymax) dom = minidom.parse(osp.join(annotation_path, name)) keypoints = dom.getElementsByTagName('keypoint') poss, ys = [], [] for keypoint in keypoints: label = keypoint.attributes['name'].value if label not in labels: labels[label] = len(labels) ys.append(labels[label]) x = float(keypoint.attributes['x'].value) y = float(keypoint.attributes['y'].value) poss += [x, y] y = torch.tensor(ys, dtype=torch.long) pos = torch.tensor(poss, dtype=torch.float).view(-1, 2) if pos.numel() > 0: # Add a small offset to the bounding because some keypoints lay # outside the bounding box intervals. box = (min(pos[:, 0].min().floor().item(), box[0]) - 16, min(pos[:, 1].min().floor().item(), box[1]) - 16, max(pos[:, 0].max().ceil().item(), box[2]) + 16, max(pos[:, 1].max().ceil().item(), box[3]) + 16) # Rescale keypoints. pos[:, 0] = (pos[:, 0] - box[0]) * 256.0 / (box[2] - box[0]) pos[:, 1] = (pos[:, 1] - box[1]) * 256.0 / (box[3] - box[1]) path = osp.join(image_path, '{}.jpg'.format(filename)) with open(path, 'rb') as f: img = Image.open(f).convert('RGB').crop(box) img = img.resize((256, 256), resample=Image.BICUBIC) img = transform(img) vgg16_outputs.clear() with torch.no_grad(): vgg16(img.unsqueeze(0).to(self.device)) xs = [] for out in vgg16_outputs: out = F.interpolate(out, (256, 256), mode='bilinear', align_corners=False) out = out.squeeze(0).permute(1, 2, 0) # [H, W, C] pos_index = pos.round().long().clamp(0, 255) out = out[pos_index[:, 1], pos_index[:, 0]] xs.append(out) x = torch.cat(xs, dim=-1) else: x = torch.tensor([], dtype=torch.float).view(0, 1024) data = Data(x=x, pos=pos, y=y, name=filename) if self.pre_filter is not None and not self.pre_filter(data): continue if self.pre_transform is not None: data = self.pre_transform(data) if i < len(train_split): train_data_list.append(data) else: test_data_list.append(data) torch.save(self.collate(train_data_list), self.processed_paths[0]) torch.save(self.collate(test_data_list), self.processed_paths[1])
def process(self): if models is None or T is None or Image is None: raise ImportError('Package `torchvision` could not be found.') category = self.category.capitalize() names = glob.glob(osp.join(self.raw_dir, category, '*.png')) names = sorted([name[:-4] for name in names]) vgg16_outputs = [] def hook(module, x, y): vgg16_outputs.append(y.to('cpu')) vgg16 = models.vgg16(pretrained=True).to(self.device) vgg16.eval() vgg16.features[20].register_forward_hook(hook) # relu4_2 vgg16.features[25].register_forward_hook(hook) # relu5_1 transform = T.Compose([ T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) data_list = [] for name in names: pos = loadmat('{}.mat'.format(name))['pts_coord'] x, y = torch.from_numpy(pos).to(torch.float) pos = torch.stack([x, y], dim=1) # The "face" category contains a single image with less than 10 # keypoints, so we need to skip it. if pos.size(0) != 10: continue with open('{}.png'.format(name), 'rb') as f: img = Image.open(f).convert('RGB') # Rescale keypoints. pos[:, 0] = pos[:, 0] * 256.0 / (img.size[0]) pos[:, 1] = pos[:, 1] * 256.0 / (img.size[1]) img = img.resize((256, 256), resample=Image.BICUBIC) img = transform(img) size = img.size()[-2:] vgg16_outputs.clear() with torch.no_grad(): vgg16(img.unsqueeze(0).to(self.device)) xs = [] for out in vgg16_outputs: out = F.interpolate(out, size, mode='bilinear', align_corners=False) out = out.squeeze(0).permute(1, 2, 0) pos_index = pos.round().long().clamp(0, 255) out = out[pos_index[:, 1], pos_index[:, 0]] xs.append(out) x = torch.cat(xs, dim=-1) data = Data(x=x, pos=pos) if self.pre_filter is not None and not self.pre_filter(data): continue if self.pre_transform is not None: data = self.pre_transform(data) data_list.append(data) torch.save(self.collate(data_list), self.processed_paths[0])
def fit( self, features, adj, labels, idx_train, idx_val=None, idx_test=None, train_iters=81, att_0=None, attention=False, model_name=None, initialize=True, verbose=False, normalize=False, patience=510, ): ''' train the gcn model, when idx_val is not None, pick the best model according to the validation loss ''' """SAINT Sampler""" """form data""" data = Data(adj=adj, features=features.to_dense(), labels=labels, idx_train=idx_train, idx_val=idx_val, idx_test=idx_test, num_node_features=int(features.shape[-1]), num_classes=int(labels.max() + 1)) data.num_nodes = 2110 data.num_classes = int(labels.max() + 1) data.num_node_features = int(features.shape[-1]) # loader = GraphSAINTRandomWalkSampler(data, batch_size=6000, walk_length=2, # num_steps=5, sample_coverage=1000, # save_dir='saint_data/', # num_workers=1) self.sim = None self.idx_test = idx_test self.attention = attention if initialize: self.initialize() if type(adj) is not torch.Tensor: features, adj, labels = utils.to_tensor(features, adj, labels, device=self.device) else: features = features.to(self.device) adj = adj.to(self.device) labels = labels.to(self.device) # normalize = False # we don't need normalize here, the norm is conducted in the GCN (self.gcn1) model # if normalize: # if utils.is_sparse_tensor(adj): # adj_norm = utils.normalize_adj_tensor(adj, sparse=True) # else: # adj_norm = utils.normalize_adj_tensor(adj) # else: # adj_norm = adj # add self loop adj = self.add_loop_sparse(adj) """The normalization gonna be done in the GCNConv""" self.adj_norm = adj self.features = features self.labels = labels # if idx_val is None: # self._train_without_val(labels, idx_train, train_iters, verbose) # else: # if patience < train_iters: # self._train_with_early_stopping(labels, idx_train, idx_val, train_iters, patience, verbose) # else: self._train_with_val(labels, idx_train, idx_val, train_iters, verbose)
def build_graph(img, tracks, current_detections, distance_limit, test=True, mean_prediction=False): if len(tracks) and len(current_detections): node_attr = [] edge_attr = [] coords_original = [] coords_normalized = [] edges_first_row = [] edges_second_row = [] edges_complete_first_row = [] edges_complete_second_row = [] ground_truth = [] for track in tracks: if mean_prediction == True: # Przewidywanie położenia temp = Detection( track.mean[:4], format='xyah' ) # pobieramy przewidzine położenie z Kalmana w formacie xyah bbox = temp.to_tlbr() bbox_norm = bbox_normalization(img, temp.to_xywh()) else: # Bez przewidywania położenia bbox = track.bbox bbox_norm = track.bbox_normalized coords_original.append( bbox ) # oryginalne koordynaty w formacie tlbr do obliczania IoU coords_normalized.append( bbox_norm ) # znormalizowane koordynaty w formacie xywh do porównywania różnicy w położeniu node_attr.append(track.crop) for detection in current_detections: coords_original.append(detection.bbox) coords_normalized.append(detection.bbox_normalized) node_attr.append(detection.crop) for i in range(len(tracks) + len(current_detections)): for j in range(len(tracks) + len(current_detections)): distance = ( (coords_original[i][0] - coords_original[j][0])**2 + (coords_original[i][1] - coords_original[j][1])**2)**0.5 if i < len(tracks) and j >= len(tracks): if distance < distance_limit: edges_first_row.append(i) edges_second_row.append(j) edge_attr.append([0.0]) # tworzenie macierzy A if test == True: edges_complete_first_row.append(i) edges_complete_second_row.append(j) # tworzenie macierzy X_ref if int(tracks[i].track_id) == int( current_detections[j - len(tracks)].track_id): ground_truth.append(1.0) else: ground_truth.append(0.0) # połączenia nieskierowane elif i >= len(tracks) and j < len(tracks): if distance < distance_limit: edges_first_row.append(i) edges_second_row.append(j) edge_attr.append([0.0]) frame_node_attr = torch.stack(node_attr) frame_edge_attr = torch.tensor(edge_attr, dtype=torch.float) frame_edges_index = torch.tensor([edges_first_row, edges_second_row], dtype=torch.long) frame_coords_normalized = torch.tensor(coords_normalized, dtype=torch.float) frame_ground_truth = torch.tensor(ground_truth, dtype=torch.float) tracklets_frame = torch.tensor(len(tracks), dtype=torch.float).reshape(1) detections_frame = torch.tensor(len(current_detections), dtype=torch.float).reshape(1) coords_original = torch.tensor(coords_original, dtype=torch.float) edges_complete = torch.tensor( [edges_complete_first_row, edges_complete_second_row], dtype=torch.long) data = Data(x=frame_node_attr, edge_index=frame_edges_index, edge_attr=frame_edge_attr, coords_normalized=frame_coords_normalized, coords_original=coords_original, ground_truth=frame_ground_truth, det_num=detections_frame, track_num=tracklets_frame, edges_complete=edges_complete) return data