def load_tissue(params=None): random_seed = params.random_seed dense_dim = params.dense_dim set_seed(random_seed) # 400 0.7895 # 200 0.5117 # 100 0.3203 # 50 0.2083 """ root = '../data/mammary_gland' num = 2915 data_path = f'{root}/mouse_Mammary_gland{num}_data.csv' type_path = f'{root}/mouse_Mammary_gland{num}_celltype.csv' """ data_path = '../data/mouse_data/mouse_brain_2915_data.csv' type_path = '../data/mouse_data/mouse_brain_2915_celltype.csv' # load celltype file then update labels accordingly cell2type = pd.read_csv(type_path, index_col=0) cell2type.columns = ['cell', 'type'] id2label = cell2type['type'].drop_duplicates(keep='first').tolist() label2id = {label: idx for idx, label in enumerate(id2label)} print(f'{len(id2label)} classes in total') cell2type['id'] = cell2type['type'].map(label2id) assert not cell2type['id'].isnull().any(), 'something wrong about celltype file.' # load data file data = pd.read_csv(data_path, index_col=0) data = data.transpose(copy=True) assert cell2type['cell'].tolist() == data.index.tolist() print(f'{data.shape[0]} cells, {data.shape[1]} genes.') # genes id2gene = data.columns.tolist() gene2id = {gene: idx for idx, gene in enumerate(id2gene)} # construct graph and add nodes and edges graph = dgl.DGLGraph() start = time() # 1. add all genes as nodes num_genes = len(id2gene) graph.add_nodes(num_genes) # maintain a kind of sparse idx for Graph row_idx, col_idx = data.to_numpy().nonzero() row_idx = row_idx + num_genes # 2. add cell nodes and edges num_cells = data.shape[0] graph.add_nodes(num_cells) graph.add_edges(row_idx, col_idx) graph.add_edges(col_idx, row_idx) print(f'Added {num_cells} nodes and {len(row_idx)} edges.') print(f'#Nodes: {graph.number_of_nodes()}, #Edges: {graph.number_of_edges()}.') print(data.head()) # reduce sparse features to dense features cell_pca = PCA(n_components=dense_dim, random_state=random_seed) cell_pca.fit(data.values) cell_feat = cell_pca.transform(data.values) cell_feat = torch.FloatTensor(cell_feat) gene_pca = PCA(n_components=dense_dim, random_state=random_seed) gene_pca.fit(data.T.values) gene_feat = gene_pca.transform(data.T.values) gene_feat = torch.FloatTensor(gene_feat) feat = torch.cat([gene_feat, cell_feat], dim=0) # feat = torch.zeros(graph.number_of_nodes(), dense_dim).normal_() cell_evr = sum(cell_pca.explained_variance_ratio_) * 100 gene_evr = sum(gene_pca.explained_variance_ratio_) * 100 print(f'[PCA] Cell EVR: {cell_evr:.2f}%. Gene EVR: {gene_evr:.2f} %.') # generate labels for training and testing labels = torch.LongTensor(cell2type['id'].tolist()) train_mask = torch.zeros(num_cells, dtype=torch.bool) train_randidx = torch.randperm(num_cells)[:int(num_cells * 0.8)] # generate mask train_mask[train_randidx] = True test_mask = ~train_mask return num_cells, num_genes, graph, feat, labels, train_mask, test_mask
In this tutorial, you learn how to create a graph and how to read and write node and edge representations. """ ############################################################################### # Creating a graph # ---------------- # The design of :class:`DGLGraph` was influenced by other graph libraries. You # can create a graph from networkx and convert it into a :class:`DGLGraph` and # vice versa. import networkx as nx import dgl g_nx = nx.petersen_graph() g_dgl = dgl.DGLGraph(g_nx) import matplotlib.pyplot as plt plt.subplot(121) nx.draw(g_nx, with_labels=True) plt.subplot(122) nx.draw(g_dgl.to_networkx(), with_labels=True) plt.show() ############################################################################### # There are many ways to construct a :class:`DGLGraph`. Below are the allowed # data types ordered by our recommendataion. # # * A pair of arrays ``(u, v)`` storing the source and destination nodes respectively. # They can be numpy arrays or tensor objects from the backend framework.
def generate_rand_graph(n): arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype( np.int64) return dgl.DGLGraph(arr, readonly=True)
else: return batch_graph.edges[tuple(zip(*batch_readout_edge_list))].data['pred'], \ batch_graph.nodes[batch_h_node_list].data['alpha'], \ batch_graph.nodes[batch_h_node_list].data['alpha_lang'] if __name__ == "__main__": model = AGRNN() node_num = 3 edge_list = [] for src in range(node_num): for dst in range(node_num): edge_list.append((src, dst)) src, dst = tuple(zip(*edge_list)) g = dgl.DGLGraph() g.add_nodes(node_num) g.add_edges(src, dst) import ipdb ipdb.set_trace() e_data = torch.eye(9) n_data = torch.arange(9) g.edata['feat'] = e_data g.ndata['x'] = n_data # @staticmethod # def _build_graph(node_num, roi_label, node_space): # graph = dgl.DGLGraph() # graph.add_nodes(node_num)
def test_rgcn_sorted(O): ctx = F.ctx() etype = [] g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True) g = g.to(F.ctx()) # 5 etypes R = 5 etype = [200, 200, 200, 200, 200] B = 2 I = 10 rgc_basis = nn.RelGraphConv(I, O, R, "basis", B).to(ctx) rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True).to(ctx) rgc_basis_low.weight = rgc_basis.weight rgc_basis_low.w_comp = rgc_basis.w_comp rgc_basis_low.loop_weight = rgc_basis.loop_weight h = th.randn((100, I)).to(ctx) r = etype h_new = rgc_basis(g, h, r) h_new_low = rgc_basis_low(g, h, r) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low) if O % B == 0: rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B).to(ctx) rgc_bdd_low = nn.RelGraphConv(I, O, R, "bdd", B, low_mem=True).to(ctx) rgc_bdd_low.weight = rgc_bdd.weight rgc_bdd_low.loop_weight = rgc_bdd.loop_weight h = th.randn((100, I)).to(ctx) r = etype h_new = rgc_bdd(g, h, r) h_new_low = rgc_bdd_low(g, h, r) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low) # with norm norm = th.rand((g.number_of_edges(), 1)).to(ctx) rgc_basis = nn.RelGraphConv(I, O, R, "basis", B).to(ctx) rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True).to(ctx) rgc_basis_low.weight = rgc_basis.weight rgc_basis_low.w_comp = rgc_basis.w_comp rgc_basis_low.loop_weight = rgc_basis.loop_weight h = th.randn((100, I)).to(ctx) r = etype h_new = rgc_basis(g, h, r, norm) h_new_low = rgc_basis_low(g, h, r, norm) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low) if O % B == 0: rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B).to(ctx) rgc_bdd_low = nn.RelGraphConv(I, O, R, "bdd", B, low_mem=True).to(ctx) rgc_bdd_low.weight = rgc_bdd.weight rgc_bdd_low.loop_weight = rgc_bdd.loop_weight h = th.randn((100, I)).to(ctx) r = etype h_new = rgc_bdd(g, h, r, norm) h_new_low = rgc_bdd_low(g, h, r, norm) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low) # id input rgc_basis = nn.RelGraphConv(I, O, R, "basis", B).to(ctx) rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True).to(ctx) rgc_basis_low.weight = rgc_basis.weight rgc_basis_low.w_comp = rgc_basis.w_comp rgc_basis_low.loop_weight = rgc_basis.loop_weight h = th.randint(0, I, (100, )).to(ctx) r = etype h_new = rgc_basis(g, h, r) h_new_low = rgc_basis_low(g, h, r) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low)
def __getitem__(self, idx): # Returns tuple # Smiles has to be in first column of the csv !! row = self.df.iloc[idx, :] smiles = row.smiles # needed anyway to build graph m = Chem.MolFromSmiles(smiles) if self.compute_selfies: Chem.Kekulize(m) k = Chem.MolToSmiles(m, isomericSmiles=False, kekuleSmiles=True) # kekuleSmiles selfie = encoder(k) """ if selfie != row.selfies: print('new selfie:', selfie) print('prev : ', row.selfies) """ else: selfie = row.selfies # 1 - Graph building if m != None: graph = smiles_to_nx(smiles) else: return None, 0, 0, 0 one_hot = { edge: torch.tensor(self.edge_map[label]) for edge, label in ( nx.get_edge_attributes(graph, 'bond_type')).items() } nx.set_edge_attributes(graph, name='one_hot', values=one_hot) try: at_type = { a: oh_tensor(self.at_map[label], self.num_atom_types) for a, label in ( nx.get_node_attributes(graph, 'atomic_num')).items() } nx.set_node_attributes(graph, name='atomic_num', values=at_type) except KeyError: print('!!!! Atom type to one-hot error for input ', smiles, ' ignored') return None, 0, 0, 0 at_charge = { a: oh_tensor(self.charges_map[label], self.num_charges) for a, label in ( nx.get_node_attributes(graph, 'formal_charge')).items() } nx.set_node_attributes(graph, name='formal_charge', values=at_charge) try: hydrogens = { a: torch.tensor(self.chi_map[label], dtype=torch.float) for a, label in ( nx.get_node_attributes(graph, 'num_explicit_hs')).items() } nx.set_node_attributes(graph, name='num_explicit_hs', values=hydrogens) except KeyError: print( '!!!! Number of explicit hydrogens to one-hot error for input ', smiles, ' ignored') return None, 0, 0, 0 aromatic = { a: torch.tensor(self.chi_map[label], dtype=torch.float) for a, label in ( nx.get_node_attributes(graph, 'is_aromatic')).items() } nx.set_node_attributes(graph, name='is_aromatic', values=aromatic) at_chir = { a: torch.tensor(self.chi_map[label], dtype=torch.float) for a, label in ( nx.get_node_attributes(graph, 'chiral_tag')).items() } nx.set_node_attributes(graph, name='chiral_tag', values=at_chir) # to dgl g_dgl = dgl.DGLGraph() node_features = [ 'atomic_num', 'formal_charge', 'num_explicit_hs', 'is_aromatic', 'chiral_tag' ] g_dgl.from_networkx(nx_graph=graph, node_attrs=node_features, edge_attrs=['one_hot']) N = g_dgl.number_of_nodes() g_dgl.ndata['h'] = torch.cat( [g_dgl.ndata[f].view(N, -1) for f in node_features], dim=1) if self.graph_only: # give only the graph (to encode in latent space) return g_dgl, 0, 0, 0 # 2 - Smiles / selfies to integer indices array if self.language == 'selfies': a, valid_flag = self.selfies_to_hot(selfie) if valid_flag == 0: # no one hot encoding for this selfie, ignore print('!!! Selfie to one-hot failed with current alphabet') return None, 0, 0, 0 else: a = np.zeros(self.max_len) idces = [self.char_to_index[c] for c in smiles] a[:len(idces)] = idces # 3 - Optional props and affinities props, targets = 0, 0 if len(self.props) > 0: props = np.array(row[self.props], dtype=np.float32) if len(self.targets) > 0 and self.binned_scores: targets = np.array(row[self.targets], dtype=np.int64) # for torch.long class labels elif len(self.targets) > 0: targets = np.array(row[self.targets], dtype=np.float32) # for torch.float values targets = np.nan_to_num(targets) # if nan somewhere, change to 0. return g_dgl, a, props, targets
def main(args): column_headers = [ "dataset", "setting", "model", "pretraining", "epoch", "accuracy" ] use_cuda = args.use_cuda and torch.cuda.is_available() print("Using CUDA:", use_cuda) results_df = pd.DataFrame(columns=column_headers) data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels # We dont use a validation set train_mask = train_mask | val_mask if args.invert: # This is different from swapping train and test mask # because train | test not cover the whole dataset train_mask, test_mask = ~train_mask, train_mask setting = 'B' else: setting = 'A' g = dgl.DGLGraph(data.graph) # Suppress warning g.set_n_initializer(dgl.init.zero_initializer) # add self loop g.add_edges(g.nodes(), g.nodes()) # g_train, g = split_graph(g, train_mask) # # Select train nodes.. train_nodes = torch.arange(g.number_of_nodes())[train_mask] if use_cuda: features, labels = features.cuda(), labels.cuda() train_mask, test_mask = train_mask.cuda(), test_mask.cuda() train_nodes = train_nodes.cuda() # .. to induce subgraph g_train = g.subgraph(train_nodes) g_train.set_n_initializer(dgl.init.zero_initializer) features_train = features[train_mask] labels_train = labels[train_mask] # Verify sizes of train set assert int(train_mask.sum().item()) == features_train.size(0)\ == labels_train.size(0) == g_train.number_of_nodes() # Random Restarts for __ in range(args.runs): # Init net net = build_model(args.model, args.dataset, g_train, in_feats, n_classes) if use_cuda: net = net.cuda() print(net) # Init optimizers # optimizer = torch.optim.Adam(net.parameters(), # **training_optimizer_params) optimizer = build_optimizer(net.parameters(), args.model, args.dataset, inference=False) print("Optimizer", optimizer) # Pre-training for epoch in range(args.epochs): train_epoch( epoch + 1, net, optimizer, features_train, labels_train, train_mask=None # Use all labels of the *train* subgraph ) print("=== INFERENCE ===") net.set_graph(g) # Eval without inference epochs accuracy_score = eval_inference(0, net, features, labels, test_mask) results_df = results_df.append(pd.DataFrame([[ args.dataset, setting, args.model, args.epochs, 0, accuracy_score ]], columns=column_headers), ignore_index=True) # Fresh optimizer for up-training at inference time # optimizer = torch.optim.Adam(net.parameters(), # **inference_optimizer_params) del optimizer optimizer = build_optimizer(net.parameters(), args.model, args.dataset, inference=True) print("Fresh inference optimizer", optimizer) for i in range(args.inference): train_epoch(i + 1, net, optimizer, features, labels, train_mask=train_mask) accuracy_score = eval_inference(i + 1, net, features, labels, test_mask) results_df = results_df.append(pd.DataFrame( [[ args.dataset, setting, args.model, args.epochs, i + 1, accuracy_score ]], columns=column_headers), ignore_index=True) del net del optimizer torch.cuda.empty_cache() # don't leak here print(args) for i in range(args.inference + 1): # Print results to command line rbi = results_df[results_df['epoch'] == i]['accuracy'] print( "Avg accuracy over {} runs after {} inference epochs: {:.4f} ({:.4f})" .format(args.runs, i, rbi.mean(), rbi.std())) if args.outfile is not None: # And store them to csv file appendDFToCSV_void(results_df, args.outfile, sep=",")
def __init__(self, dataset, args): src = [dataset.train[0]] etype_id = [dataset.train[1]] dst = [dataset.train[2]] self.num_train = len(dataset.train[0]) if args.dataset == "wikikg90M": self.valid_dict = dataset.valid self.num_valid = len(self.valid_dict['h,r->t']['hr']) elif dataset.valid is not None: src.append(dataset.valid[0]) etype_id.append(dataset.valid[1]) dst.append(dataset.valid[2]) self.num_valid = len(dataset.valid[0]) else: self.num_valid = 0 if args.dataset == "wikikg90M": self.test_dict = dataset.test self.num_test = len(self.test_dict['h,r->t']['hr']) elif dataset.test is not None: src.append(dataset.test[0]) etype_id.append(dataset.test[1]) dst.append(dataset.test[2]) self.num_test = len(dataset.test[0]) else: self.num_test = 0 if args.dataset == "wikikg90M": print('|valid|:', self.num_valid) print('|test|:', self.num_test) return assert len( src) > 1, "we need to have at least validation set or test set." src = np.concatenate(src) etype_id = np.concatenate(etype_id) dst = np.concatenate(dst) coo = sp.sparse.coo_matrix( (np.ones(len(src)), (src, dst)), shape=[dataset.n_entities, dataset.n_entities]) g = dgl.DGLGraph(coo, readonly=True, multigraph=True, sort_csr=True) g.edata['tid'] = F.tensor(etype_id, F.int64) self.g = g if args.eval_percent < 1: self.valid = np.random.randint( 0, self.num_valid, size=(int( self.num_valid * args.eval_percent), )) + self.num_train else: self.valid = np.arange(self.num_train, self.num_train + self.num_valid) print('|valid|:', len(self.valid)) if args.eval_percent < 1: self.test = np.random.randint( 0, self.num_test, size=(int(self.num_test * args.eval_percent, ))) self.test += self.num_train + self.num_valid else: self.test = np.arange(self.num_train + self.num_valid, self.g.number_of_edges()) print('|test|:', len(self.test))
def generate_batch_G(self, target_bg=None, x=None, batch_size=1, style=None): # init graph k = self.k m = self.m n = self.n ajr = self.ajr if style is not None: style = style else: style = self.style if target_bg is not None: bg = dgl.batch(np.random.choice(target_bg, batch_size, replace=True)) else: if style.startswith('er'): p = float(style.split('-')[1]) G = [nx.erdos_renyi_graph(n, p) for _ in range(batch_size)] adj_matrices = torch.cat([torch.tensor(nx.adjacency_matrix(g).todense()).float() for g in G]) elif style.startswith('ba'): _m = int(style.split('-')[1]) G = [nx.barabasi_albert_graph(n, _m) for _ in range(batch_size)] adj_matrices = torch.cat([torch.tensor(nx.adjacency_matrix(g).todense()).float() for g in G]) # init batch graphs gs = [dgl.DGLGraph() for _ in range(batch_size)] _ = [(g.add_nodes(n), g.add_edges(self.src, self.dst)) for g in gs] bg = dgl.batch(gs) # 2-d coordinates 'x' if x is None: if style == 'plain': bg.ndata['x'] = torch.rand((batch_size * n, 2)) elif style == 'shift': bg.ndata['x'] = torch.rand((batch_size * n, 2)) * 10 + 5 elif style.startswith('cluster'): _h = 2 cluster_style = int(style.split('-')[1]) if cluster_style == 0: center = torch.rand((batch_size * k, 1, _h)).repeat(1, m, 1) * 6 elif cluster_style == 1: # k=4 mask = torch.tensor([[[0, 0]], [[0, 5]], [[5, 0]], [[5, 5]]]).repeat(batch_size, 1, 1) center = torch.rand((batch_size * k, 1, _h)) * 3 + mask center = center.repeat(1, m, 1) elif cluster_style == 2: # k=4 mask = torch.tensor([[[0, 0]], [[0, 0]], [[5, 5]], [[5, 5]]]).repeat(batch_size, 1, 1) center = torch.rand((batch_size * k, 1, _h)) * 3 + mask center = center.repeat(1, m, 1) bg.ndata['x'] = (center + torch.rand((batch_size * k, m, _h))).view(batch_size * n, _h) else: bg.ndata['x'] = x # label if self.cut == 'equal': label = torch.tensor(range(k)).unsqueeze(1).repeat(batch_size, m).view(-1) else: label = torch.tensor(self.init_label).repeat(batch_size) batch_mask = torch.tensor(range(0, n * batch_size, n)).unsqueeze(1).expand(batch_size, n).flatten() perm_idx = torch.cat([torch.randperm(n) for _ in range(batch_size)]) + batch_mask label = label[perm_idx].view(batch_size, n) bg.ndata['label'] = torch.nn.functional.one_hot(label, k).float().view(batch_size * n, k) # calculate edges if target_bg is not None: # permute the dist matrix # TODO: add ndata['adj'] bg.edata['d'] *= F.relu(torch.ones(bg.edata['d'].shape).cuda() + 0.1 * torch.randn(bg.edata['d'].shape).cuda()) else: if style.startswith('er') or style.startswith('ba'): # TODO: add ndata['adj'] bg.edata['d'] = adj_matrices.view(batch_size, -1, 1)[:, self.nonzero_idx, :].view(-1, 1) else: _, neighbor_idx, square_dist_matrix = dgl.transform.knn_graph(bg.ndata['x'].view(batch_size, n, -1), ajr + 1, extend_info=True) square_dist_matrix = F.relu(square_dist_matrix, inplace=True) # numerical error could result in NaN in sqrt. value bg.ndata['adj'] = torch.sqrt(square_dist_matrix).view(bg.number_of_nodes(), -1) # scale d (maintain avg=0.5): if style != 'plain': bg.ndata['adj'] /= (bg.ndata['adj'].sum() / (bg.ndata['adj'].shape[0]**2) / 0.5) bg.edata['d'] = bg.ndata['adj'].view(batch_size, -1, 1)[:, self.nonzero_idx, :].view(-1, 1) group_matrix = torch.bmm(bg.ndata['label'].view(batch_size, n, -1), bg.ndata['label'].view(batch_size, n, -1).transpose(1, 2)).view(batch_size, -1)[:, self.nonzero_idx].view(-1, 1) if target_bg is not None: bg.edata['e_type'][:, 1:] = group_matrix else: if style.startswith('er') or style.startswith('ba'): bg.edata['e_type'] = torch.cat([bg.edata['d'], group_matrix], dim=1) else: neighbor_idx -= torch.tensor(range(0, batch_size * n, n)).view(batch_size, 1, 1).repeat(1, n, ajr + 1) \ - torch.tensor(range(0, batch_size * n * n, n * n)).view(batch_size, 1, 1).repeat(1, n, ajr + 1) adjacent_matrix = torch.zeros((batch_size * n * n, 1)) adjacent_matrix[neighbor_idx + self.adj_mask.repeat(batch_size, 1, 1)] = 1 adjacent_matrix = adjacent_matrix.view(batch_size, n * n, 1)[:, self.nonzero_idx, :].view(-1, 1) bg.edata['e_type'] = torch.cat([adjacent_matrix, group_matrix], dim=1) return bg
def test_pickling_graph(): # graph structures and frames are pickled g = dgl.DGLGraph() g.add_nodes(3) src = F.tensor([0, 0]) dst = F.tensor([1, 2]) g.add_edges(src, dst) x = F.randn((3, 7)) y = F.randn((3, 5)) a = F.randn((2, 6)) b = F.randn((2, 4)) g.ndata['x'] = x g.ndata['y'] = y g.edata['a'] = a g.edata['b'] = b # registered functions are pickled g.register_message_func(_global_message_func) reduce_func = fn.sum('x', 'x') g.register_reduce_func(reduce_func) # custom attributes should be pickled g.foo = 2 new_g = _reconstruct_pickle(g) _assert_is_identical(g, new_g) assert new_g.foo == 2 assert new_g._message_func == _global_message_func assert isinstance(new_g._reduce_func, type(reduce_func)) assert new_g._reduce_func._name == 'sum' assert new_g._reduce_func.reduce_op == F.sum assert new_g._reduce_func.msg_field == 'x' assert new_g._reduce_func.out_field == 'x' # test batched graph with partial set case g2 = dgl.DGLGraph() g2.add_nodes(4) src2 = F.tensor([0, 1]) dst2 = F.tensor([2, 3]) g2.add_edges(src2, dst2) x2 = F.randn((4, 7)) y2 = F.randn((3, 5)) a2 = F.randn((2, 6)) b2 = F.randn((2, 4)) g2.ndata['x'] = x2 g2.nodes[[0, 1, 3]].data['y'] = y2 g2.edata['a'] = a2 g2.edata['b'] = b2 bg = dgl.batch([g, g2]) bg2 = _reconstruct_pickle(bg) _assert_is_identical(bg, bg2) new_g, new_g2 = dgl.unbatch(bg2) _assert_is_identical(g, new_g) _assert_is_identical(g2, new_g2) # readonly graph g = dgl.DGLGraph([(0, 1), (1, 2)], readonly=True) new_g = _reconstruct_pickle(g) _assert_is_identical(g, new_g) # multigraph g = dgl.DGLGraph([(0, 1), (0, 1), (1, 2)], multigraph=True) new_g = _reconstruct_pickle(g) _assert_is_identical(g, new_g) # readonly multigraph g = dgl.DGLGraph([(0, 1), (0, 1), (1, 2)], multigraph=True, readonly=True) new_g = _reconstruct_pickle(g) _assert_is_identical(g, new_g)
def ProcessImage(batch_itr): X, y = batch_itr ybar = np.ones([64, 64]) * -2. Y = np.stack([y[0], y[1], y[2], y[3], y[4], y[5], ybar], axis=0) test1, test2, test3, test4, test5, test6, testT =\ MakeLayer(X[0], Y[0], 0), MakeLayer(X[1], Y[1], 1), \ MakeLayer(X[2], Y[2], 2), MakeLayer(X[3], Y[3], 3), \ MakeLayer(X[4], Y[4], 4), MakeLayer(X[5], Y[5], 5), \ MakeLayer(X[6], Y[6], 6) energy_val = np.concatenate( [testT[0], test1[0], test2[0], test3[0], test4[0], test5[0], test6[0]]) x_val = np.concatenate( [testT[1], test1[1], test2[1], test3[1], test4[1], test5[1], test6[1]]) y_val = np.concatenate( [testT[2], test1[2], test2[2], test3[2], test4[2], test5[2], test6[2]]) z_val = np.concatenate( [testT[3], test1[3], test2[3], test3[3], test4[3], test5[3], test6[3]]) x_idx = np.concatenate( [testT[4], test1[4], test2[4], test3[4], test4[4], test5[4], test6[4]]) y_idx = np.concatenate( [testT[5], test1[5], test2[5], test3[5], test4[5], test5[5], test6[5]]) z_idx = np.concatenate( [testT[6], test1[6], test2[6], test3[6], test4[6], test5[6], test6[6]]) target_val = np.concatenate( [testT[7], test1[7], test2[7], test3[7], test4[7], test5[7], test6[7]]) point_indx = np.array([z_idx, x_idx, y_idx], dtype=int) point_indx = np.transpose(point_indx) point = np.array([x_val, y_val, z_val]) point = np.transpose(point) point = np.reshape(point, (1, point.shape[0], point.shape[1])) point = torch.FloatTensor(point) # x_red = x[loc] # y_red = y[loc] graph = KNNGraph(graph_size) npoints = energy_val.shape[0] if (npoints < graph_size): g = dgl.DGLGraph() g.add_nodes(2) else: g = graph(point) g = dgl.transform.remove_self_loop(g) sample = { 'Input': X, 'seq_length': len(energy_val), 'point_xyz': point, 'target': torch.FloatTensor(y), 'point_idx_zxy': point_indx, 'energy': torch.FloatTensor(energy_val), 'gr': g } return sample
def main(): g_nx = nx.petersen_graph() g_dgl = dgl.DGLGraph(g_nx) plt.figure()
def gen_from_data(data, readonly, sort): return dgl.DGLGraph(data, readonly=readonly, sort_csr=True)
def gen_by_mutation(): g = dgl.DGLGraph() src, dst = edge_pair_input() g.add_nodes(10) g.add_edges(src, dst) return g
def plot_tree_graph(jet_graph, ax): ax.set_axis_off() vtxlist, vtxdict, hadron_list, additional_vtx_dict = compute_jet_vtx( jet_graph) pt = jet_graph.jet_pt eta = jet_graph.jet_eta flav = jet_graph.jet_DoubleHadLabel ax.set_title('Flavour : ' + str(flav) + ' pt: ' + '{0:.2f}'.format(pt / 1000.0) + ' eta: ' + '{0:.2f}'.format(eta), fontsize=20) g = dgl.DGLGraph() n_nodes = len(jet_graph['trk_node_index']) + len( jet_graph['jf_node_index']) + len(jet_graph['particle_node_index']) g.add_nodes(n_nodes) edge_list = np.dstack([jet_graph.edge_start, jet_graph.edge_end])[0] labels = {} for edge in edge_list: s, e = edge g.add_edge(int(s), int(e)) pv_x, pv_y, pv_z = jet_graph.truth_PVx, jet_graph.truth_PVy, jet_graph.truth_PVz g.add_nodes(1) particles_in_primary = [] for idx, pdgid, x0, y0, z0, x, y, z, stat, injet in zip( jet_graph['particle_node_index'], jet_graph['particle_node_pdgid'], jet_graph.particle_node_prod_x, jet_graph.particle_node_prod_y, jet_graph.particle_node_prod_z, jet_graph.particle_node_decay_x, jet_graph.particle_node_decay_y, jet_graph.particle_node_decay_z, jet_graph.particle_node_status, jet_graph.particle_node_inJet): if np.linalg.norm([x0 - pv_x, y0 - pv_y, z0 - pv_z]) < 0.01: particles_in_primary.append(idx) for p_in_primary in particles_in_primary: has_parent = False #loop over the other children of the vtx, see if one of them is the parent for p_in_primary_j in particles_in_primary: for edge in edge_list: s, e = edge if p_in_primary_j == s and p_in_primary == e: has_parent = True if not has_parent: g.add_edge(n_nodes, int(p_in_primary)) G = g.to_networkx() node_colors = [] for idx, pdgid, charge, in zip(jet_graph['particle_node_index'], jet_graph['particle_node_pdgid'], jet_graph.particle_node_charge): if abs(pdgid) in [6, 24]: node_colors.append('lightgreen') elif charge == 0: node_colors.append('khaki') else: node_colors.append('lightsalmon') pos = nx.nx_agraph.graphviz_layout(G, prog='dot') min_max_x = list(pos[0]) y_min = -10 for key_i, key in enumerate(jet_graph['particle_node_index']): if key_i == 0: min_max_x = list(pos[key]) x, y = pos[key] if x < min_max_x[0]: min_max_x[0] = x if x > min_max_x[1]: min_max_x[1] = x if y < y_min: y_min = y - 10 x_range = min_max_x[1] - min_max_x[0] n_tracks = len(jet_graph['trk_node_index']) track_x_positions = [] for track_i, idx in enumerate(jet_graph['trk_node_index']): x_orig, y_orig = pos[idx] if idx not in jet_graph.edge_end: track_x_positions.append( (min_max_x[0] + track_i * x_range / n_tracks, idx)) else: track_x_positions.append((x_orig, idx)) track_x_positions = sorted(track_x_positions, key=lambda x: x[0]) spacing = 50 for track_i in range(1, len(track_x_positions)): previous_pos = track_x_positions[track_i - 1][0] current_pos = track_x_positions[track_i][0] if current_pos < previous_pos + spacing: track_x_positions[track_i] = (previous_pos + spacing, track_x_positions[track_i][1]) for track_x, idx in track_x_positions: pos[idx] = (track_x, y_min) n_jf_vtx = len(jet_graph['jf_node_index']) for idx, vtx_i in zip(jet_graph['jf_node_index'], range(len(jet_graph['jf_node_index']))): pos[idx] = (min_max_x[0] + x_range / 2 + (vtx_i) * x_range / n_jf_vtx / 2., y_min - 80) labels[idx] = 'JF' + str(vtx_i) nx.draw_networkx_nodes(G, pos, node_color='orchid', node_size=1200, ax=ax, nodelist=jet_graph['jf_node_index']) nx.draw_networkx_nodes(G, pos, node_color='lightskyblue', node_size=300, ax=ax, nodelist=jet_graph['trk_node_index']) nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=800, ax=ax, nodelist=jet_graph['particle_node_index']) nx.draw_networkx_edges(G, pos, ax=ax) for idx, pdgid, x0, y0, z0, x, y, z, stat, injet in zip( jet_graph['particle_node_index'], jet_graph['particle_node_pdgid'], jet_graph.particle_node_prod_x, jet_graph.particle_node_prod_y, jet_graph.particle_node_prod_z, jet_graph.particle_node_decay_x, jet_graph.particle_node_decay_y, jet_graph.particle_node_decay_z, jet_graph.particle_node_status, jet_graph.particle_node_inJet): #labels[idx] = str(idx) #labels[idx] = str(stat)+' '+str(injet) #labels[idx] = '{0:.2f}'.format(x0)+'\n'+ '{0:.2f}'.format(y0)+'\n'+ '{0:.2f}'.format(z0) #labels[idx] = '{0:.4f}'.format(np.linalg.norm(np.array([pv_x,pv_y,pv_z])-np.array([x0,y0,z0]))) if pdgid in pdg_id_dict: labels[idx] = pdg_id_dict[pdgid] else: labels[idx] = str(pdgid) nx.draw_networkx_labels(G, pos, labels, ax=ax)
from scipy.special import softmax from tensorboardX import SummaryWriter macrostep = 10 DEVICE = "cuda:0" with open("stoppedEdges.pkl", 'rb') as f: stoppedEdges = pickle.load(f) with open("amatrix_edges.pkl", 'rb') as f: A = pickle.load(f) indices = {c: i for i, c in enumerate(list(A.columns))} invertedIndices = {i: c for i, c in enumerate(list(A.columns))} g = dgl.DGLGraph(np.eye(A.values.shape[0]) + A.values) #g = dgl.DGLGraph(A.values) N = g.number_of_nodes() embedding_n = 32 g.ndata['entered'] = torch.zeros((g.number_of_nodes(), 1)).cuda().to(DEVICE) class PredictParkingModule(nn.Module): def __init__(self, in_feats, embedding_n): super(PredictParkingModule, self).__init__() self.embed = nn.Embedding(in_feats, embedding_n) self.L2 = nn.Linear(embedding_n + 1, 1) self.A3 = F.relu def forward(self, node):
def test_edge_softmax(): # Basic g = dgl.DGLGraph(nx.path_graph(3)) edata = F.ones((g.number_of_edges(), 1)) a = nn.edge_softmax(g, edata) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(a, uniform_attention(g, a.shape)) # Test higher dimension case edata = F.ones((g.number_of_edges(), 3, 1)) a = nn.edge_softmax(g, edata) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(a, uniform_attention(g, a.shape)) # Test both forward and backward with PyTorch built-in softmax. g = dgl.DGLGraph() g.add_nodes(30) # build a complete graph for i in range(30): for j in range(30): g.add_edge(i, j) score = F.randn((900, 1)) score.requires_grad_() grad = F.randn((900, 1)) y = F.softmax(score.view(30, 30), dim=0).view(-1, 1) y.backward(grad) grad_score = score.grad score.grad.zero_() y_dgl = nn.edge_softmax(g, score) assert len(g.ndata) == 0 assert len(g.edata) == 0 # check forward assert F.allclose(y_dgl, y) y_dgl.backward(grad) # checkout gradient assert F.allclose(score.grad, grad_score) print(score.grad[:10], grad_score[:10]) # Test 2 def generate_rand_graph(n, m=None, ctor=dgl.DGLGraph): if m is None: m = n arr = (sp.sparse.random(m, n, density=0.1, format='coo') != 0).astype(np.int64) return ctor(arr, readonly=True) for g in [generate_rand_graph(50), generate_rand_graph(50, ctor=dgl.graph), generate_rand_graph(100, 50, ctor=dgl.bipartite)]: a1 = F.randn((g.number_of_edges(), 1)).requires_grad_() a2 = a1.clone().detach().requires_grad_() g.edata['s'] = a1 g.group_apply_edges('dst', lambda edges: {'ss':F.softmax(edges.data['s'], 1)}) g.edata['ss'].sum().backward() builtin_sm = nn.edge_softmax(g, a2) builtin_sm.sum().backward() print(a1.grad - a2.grad) assert len(g.srcdata) == 0 assert len(g.dstdata) == 0 assert len(g.edata) == 2 assert F.allclose(a1.grad, a2.grad, rtol=1e-4, atol=1e-4) # Follow tolerance in unittest backend
def test_nx_conversion(): # check conversion between networkx and DGLGraph def _check_nx_feature(nxg, nf, ef): # check node and edge feature of nxg # this is used to check to_networkx num_nodes = len(nxg) num_edges = nxg.size() if num_nodes > 0: node_feat = ddict(list) for nid, attr in nxg.nodes(data=True): assert len(attr) == len(nf) for k in nxg.nodes[nid]: node_feat[k].append(F.unsqueeze(attr[k], 0)) for k in node_feat: feat = F.cat(node_feat[k], 0) assert F.allclose(feat, nf[k]) else: assert len(nf) == 0 if num_edges > 0: edge_feat = ddict(lambda: [0] * num_edges) for u, v, attr in nxg.edges(data=True): assert len(attr) == len(ef) + 1 # extra id eid = attr['id'] for k in ef: edge_feat[k][eid] = F.unsqueeze(attr[k], 0) for k in edge_feat: feat = F.cat(edge_feat[k], 0) assert F.allclose(feat, ef[k]) else: assert len(ef) == 0 n1 = F.randn((5, 3)) n2 = F.randn((5, 10)) n3 = F.randn((5, 4)) e1 = F.randn((4, 5)) e2 = F.randn((4, 7)) g = DGLGraph(multigraph=True) g.add_nodes(5) g.add_edges([0, 1, 3, 4], [2, 4, 0, 3]) g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3}) g.edata.update({'e1': e1, 'e2': e2}) # convert to networkx nxg = g.to_networkx(node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2']) assert len(nxg) == 5 assert nxg.size() == 4 _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2}) # convert to DGLGraph, nx graph has id in edge feature # use id feature to test non-tensor copy g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id']) # check graph size assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 # check number of features # test with existing dglgraph (so existing features should be cleared) assert len(g.ndata) == 1 assert len(g.edata) == 2 # check feature values assert F.allclose(g.ndata['n1'], n1) # with id in nx edge feature, e1 should follow original order assert F.allclose(g.edata['e1'], e1) assert F.array_equal(g.get_e_repr()['id'], F.copy_to(F.arange(0, 4), F.cpu())) # test conversion after modifying DGLGraph g.pop_e_repr( 'id') # pop id so we don't need to provide id when adding edges new_n = F.randn((2, 3)) new_e = F.randn((3, 5)) g.add_nodes(2, data={'n1': new_n}) # add three edges, one is a multi-edge g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e}) n1 = F.cat((n1, new_n), 0) e1 = F.cat((e1, new_e), 0) # convert to networkx again nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1']) assert len(nxg) == 7 assert nxg.size() == 7 _check_nx_feature(nxg, {'n1': n1}, {'e1': e1}) # now test convert from networkx without id in edge feature # first pop id in edge feature for _, _, attr in nxg.edges(data=True): attr.pop('id') # test with a new graph g = DGLGraph(multigraph=True) g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1']) # check graph size assert g.number_of_nodes() == 7 assert g.number_of_edges() == 7 # check number of features assert len(g.ndata) == 1 assert len(g.edata) == 1 # check feature values assert F.allclose(g.ndata['n1'], n1) # edge feature order follows nxg.edges() edge_feat = [] for _, _, attr in nxg.edges(data=True): edge_feat.append(F.unsqueeze(attr['e1'], 0)) edge_feat = F.cat(edge_feat, 0) assert F.allclose(g.edata['e1'], edge_feat) # Test converting from a networkx graph whose nodes are # not labeled with consecutive-integers. nxg = nx.cycle_graph(5) nxg.remove_nodes_from([0, 4]) for u in nxg.nodes(): nxg.node[u]['h'] = F.tensor([u]) for u, v, d in nxg.edges(data=True): d['h'] = F.tensor([u, v]) g = dgl.DGLGraph() g.from_networkx(nxg, node_attrs=['h'], edge_attrs=['h']) assert g.number_of_nodes() == 3 assert g.number_of_edges() == 4 assert g.has_edge_between(0, 1) assert g.has_edge_between(1, 2) assert F.allclose(g.ndata['h'], F.tensor([[1.], [2.], [3.]])) assert F.allclose(g.edata['h'], F.tensor([[1., 2.], [1., 2.], [2., 3.], [2., 3.]]))
read_out = dgl.mean_nodes(g, 'features') # output = F.softmax(self.fc(read_out)) output = self.fc(read_out) return output # graph中src和dst节点(人体关键点指向关系) src = np.array([ 17, 15, 18, 16, 0, 2, 3, 4, 5, 6, 7, 8, 9, 12, 10, 13, 11, 14, 23, 22, 24, 20, 19, 21 ]) dst = np.array([ 15, 0, 16, 0, 1, 1, 2, 3, 1, 5, 6, 1, 8, 8, 9, 12, 10, 13, 22, 11, 11, 19, 14, 14 ]) graph = dgl.DGLGraph((src, dst)) # 建立graph graph = dgl.add_self_loop(graph) model = GCN(3, 20, 2) # 只训练一张图,检查模型能否正常运行 inputs = [body_graph_sit[0][0], body_graph_stand[1][0]] label = [body_graph_sit[0][1], body_graph_stand[1][1]] optimizer = torch.optim.Adam(model.parameters(), lr=0.0005) for epoch in range(50): for i in range(2): output = model(graph, inputs[i].ndata['coordinate'].float()) pred = torch.argmax(output, axis=1) loss = nn.BCEWithLogitsLoss() loss = loss(output, label[i])
import networkx as nx import matplotlib.pyplot as plt import torch import dgl N = 100 # number of nodes DAMP = 0.85 # damping factor阻尼因子 K = 10 # number of iterations g = nx.nx.erdos_renyi_graph(N, 0.1) # 图随机生成器,生成nx图 g = dgl.DGLGraph(g) # 转换成DGL图 # nx.draw(g.to_networkx(), node_size=50, node_color=[[.5, .5, .5, ]]) # 使用nx绘制,设置节点大小及灰度值 # plt.show() g.ndata['pv'] = torch.ones(N) / N #初始化PageRank值 batch processing g.ndata['deg'] = g.out_degrees(g.nodes()).float() #初始化节点特征 print(g.ndata) #定义message函数,它将每个节点的PageRank值除以其out-degree,并将结果作为消息传递给它的邻居: def pagerank_message_func(edges): pv = edges.src['pv'] deg = edges.src['deg'] return {'pv': pv / deg} #定义reduce函数,它从mailbox中删除并聚合message,并计算其新的PageRank值: def pagerank_reduce_func(nodes): mail_box = nodes.mailbox['pv'] msgs = torch.sum(mail_box, dim=1) pv = (1 - DAMP) / N + DAMP * msgs return {'pv': pv}
def load(self): print('loading data') # Edge features # adjs = [] edge_attr_name = [] g = nx.readwrite.edgelist.read_edgelist( self.edges_dir, delimiter=',', data=[ # (x ('GO_ID', float), ('Gene_Family_Name', float), ('chebi', float), ('chemogenomics', float), ('cid', float), ('drug', float), ('expression', float), ('gene', float), ('hprd', float), ('protein', float), ('substructure', float), ('tissue', float) ], comments='#', create_using=nx.DiGraph) v_map = pd.read_csv(self.vertex_map_path, delimiter=',', header=None, dtype={ 'node': str, 'id': int }) v_map[1] = v_map[1].astype(int) mapping = pd.Series(v_map[1].values, index=v_map[0]).to_dict() g = nx.relabel.relabel_nodes(g, mapping) print('number of connected components: ', nx.algorithms.components.number_weakly_connected_components(g)) # Node Features if self.node_features_path is None: print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') print('!!! No node features is given, use dummy featuers!!!') print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') features = np.ones((g.number_of_nodes(), 10)) else: features = pd.read_csv(self.node_features_path, delimiter=',').values # Ground Truth label labels = pd.read_csv(self.label_path, delimiter=',') # convert label to one-hot format one_hot_labels = pd.get_dummies( data=labels, dummy_na=True, columns=['label']).set_index('id') # N X (#edge attr) # one hot # print(labels.columns) one_hot_labels = one_hot_labels.drop(['label_nan'], axis=1) size = features.shape[0] train_id = set() test_id = set() train_mask = np.zeros((size, )).astype(bool) val_mask = np.zeros((size, )).astype(bool) test_mask = np.zeros((size, )).astype(bool) train_ratio = 0.8 np.random.seed(1) for column in one_hot_labels.columns: set_of_key = set( one_hot_labels[(one_hot_labels[column] == 1)].index) train_key_set = set( np.random.choice(list(set_of_key), size=int(len(set_of_key) * train_ratio), replace=False)) test_key_set = set_of_key - train_key_set train_id = train_id.union(train_key_set) test_id = test_id.union(test_key_set) train_mask[list(train_id)] = 1 val_mask[list(test_id)] = 1 test_mask[list(test_id)] = 1 # one_hot_labels = one_hot_labels.values[:,:-1] # convert to numpy format and remove the nan column y = np.zeros(size) y[one_hot_labels.index] = np.argmax(one_hot_labels.values, 1) y_train = np.zeros((size, one_hot_labels.shape[1])) # one hot format y_val = np.zeros((size, one_hot_labels.shape[1])) y_test = np.zeros((size, one_hot_labels.shape[1])) y_train[train_mask, :] = one_hot_labels.loc[sorted(train_id)] y_val[val_mask, :] = one_hot_labels.loc[sorted(test_id)] y_test[test_mask, :] = one_hot_labels.loc[sorted(test_id)] # print('adjs length: ', len(adjs)) print('features shape: ', features.shape) print('y_train shape: ', y_train.shape) print('y_val shape: ', y_val.shape) print('y_test shape: ', y_test.shape) print('train_mask shape: ', train_mask.shape) print('val_mask shape: ', val_mask.shape) print('test_mask shape: ', test_mask.shape) # self.adj = adjs[0] self.graph = dgl.DGLGraph() self.graph.from_networkx(nx_graph=g, edge_attrs=[ 'GO_ID', 'Gene_Family_Name', 'chebi', 'chemogenomics', 'cid', 'drug', 'expression', 'gene', 'hprd', 'protein', 'substructure', 'tissue' ]) self.num_edge_feats = len(self.graph.edge_attr_schemes()) # standardize edge attrs for attr in self.graph.edge_attr_schemes().keys(): self.graph.edata[attr] = (self.graph.edata[attr] - torch.mean( self.graph.edata[attr])) / torch.var(self.graph.edata[attr]) # concatenate edge attrs self.graph.edata['e'] = torch.cat([ self.graph.edata[attr][:, None] for attr in self.graph.edge_attr_schemes().keys() ], dim=1) print(self.graph.edge_attr_schemes()) # self.graph.from_scipy_sparse_matrix(spmat=self.adj) self.labels = y self.num_labels = one_hot_labels.shape[1] # self.edge_attr_adjs = adjs[1:] self.features = features self.y_train = y_train self.y_val = y_val self.y_test = y_test self.train_mask = train_mask.astype(int) self.val_mask = val_mask.astype(int) self.test_mask = test_mask.astype(int) self.edge_attr_name = edge_attr_name
def process_game_state_to_dgl(game_state: GameState, use_absolute_pos=False, edge_ally_to_enemy=False): # TODO 1 : Find a better way for managing input features and related constants! units = game_state.units ally_units = units.owned enemy_units = units.enemy num_allies = len(ally_units) num_enemies = len(enemy_units) exist_allies = False node_types = [] g = dgl.DGLGraph(multigraph=True) g.set_e_initializer(dgl.init.zero_initializer) # using curie_initializer for node features matters a lot ! # working as a mask for computing action probs later. g.set_n_initializer(curie_initializer) node_features = [] allies_health = 0 allies_health_percentage = 0 allies_mineral_cost = 0 allies_vespene_cost = 0 allies_food_cost = 0 ally_indices = [] tags = [unit.tag for unit in ally_units + enemy_units] tags_tensor = torch.LongTensor(tags) tag2unit_dict = dict() if num_allies >= 1: exist_allies = True allies_center_pos = ally_units.center allies_unit_dict = dict() allies_index_dict = dict() for i, allies_unit in enumerate(ally_units): tag2unit_dict[allies_unit.tag] = allies_unit ally_indices.append(i) node_feature = list() one_hot_type_id = get_one_hot_unit_type(allies_unit.type_id.value) node_feature.extend(one_hot_type_id) node_feature.extend(list(allies_center_pos - allies_unit.position)) if use_absolute_pos: node_feature.extend(list(allies_unit.position)) node_feature.append(allies_unit.health_max) node_feature.append(allies_unit.health_percentage) node_feature.append(allies_unit.weapon_cooldown) node_feature.append(allies_unit.ground_dps) one_hot_node_type = get_one_hot_node_type(NODE_ALLY) node_feature.extend(one_hot_node_type) node_features.append(node_feature) allies_unit_dict[allies_unit] = i allies_index_dict[i] = allies_unit node_types.append(NODE_ALLY) allies_health += allies_unit.health allies_health_percentage += allies_unit.health_percentage allies_mineral_cost += type2cost[allies_unit.name][0] allies_vespene_cost += type2cost[allies_unit.name][1] allies_food_cost += type2cost[allies_unit.name][2] enemies_health = 0 enemies_health_percentage = 0 enemies_mineral_cost = 0 enemies_vespene_cost = 0 enemies_food_cost = 0 enemies_indices = [] if num_enemies >= 1: enemy_center_pos = enemy_units.center enemy_unit_dict = dict() enemy_index_dict = dict() for j, enemy_unit in enumerate(enemy_units): tag2unit_dict[enemy_unit.tag] = enemy_unit enemies_indices.append(num_allies + j) node_feature = list() one_hot_type_id = get_one_hot_unit_type(enemy_unit.type_id.value) node_feature.extend(one_hot_type_id) node_feature.extend(list(enemy_center_pos - enemy_unit.position)) if use_absolute_pos: node_feature.extend(list(enemy_unit.position)) node_feature.append(enemy_unit.health_max) node_feature.append(enemy_unit.health_percentage) node_feature.append(enemy_unit.weapon_cooldown) node_feature.append(enemy_unit.ground_dps) one_hot_node_type = get_one_hot_node_type(NODE_ENEMY) node_feature.extend(one_hot_node_type) node_features.append(node_feature) enemy_unit_dict[enemy_unit] = j + num_allies enemy_index_dict[j + num_allies] = enemy_unit node_types.append(NODE_ENEMY) enemies_health += enemy_unit.health enemies_health_percentage += enemy_unit.health_percentage enemies_mineral_cost += type2cost[enemy_unit.name][0] enemies_vespene_cost += type2cost[enemy_unit.name][1] enemies_food_cost += type2cost[enemy_unit.name][2] if num_allies + num_enemies >= 1: node_features = np.stack( node_features) # [Num total units x Num features] node_features = torch.Tensor(node_features) node_types = torch.Tensor(node_types).reshape(-1) unit_indices = torch.Tensor(ally_indices + enemies_indices).reshape(-1).int() num_nodes = node_features.size(0) if exist_allies: # Add Node features: allies + enemies g.add_nodes( num_nodes, { 'node_feature': node_features, 'node_type': node_types, 'tag': tags_tensor, 'node_index': unit_indices, 'init_node_feature': node_features }) if num_allies >= 2: # Add allies edges allies_edge_indices = cartesian_product(ally_indices, ally_indices, return_1d=True) # To support hyper network encoder, we keep two edge_types allies_edge_type = torch.Tensor(data=(EDGE_ALLY, )) allies_edge_type_one_hot = torch.Tensor( data=get_one_hot_edge_type(EDGE_ALLY)) num_allies_edges = len(allies_edge_indices[0]) g.add_edges( allies_edge_indices[0], allies_edge_indices[1], { 'edge_type_one_hot': allies_edge_type_one_hot.repeat(num_allies_edges, 1), 'edge_type': allies_edge_type.repeat(num_allies_edges) }) if num_allies >= 1 and num_enemies >= 1: # Constructing bipartite graph for computing primitive attack on attack bipartite_edges = cartesian_product(enemies_indices, ally_indices, return_1d=True) # the edges from enemies to the allies # To support hyper network encoder, we keep two edge_types inter_army_edge_type = torch.Tensor(data=(EDGE_ENEMY, )) inter_army_edge_type_one_hot = torch.Tensor( data=get_one_hot_edge_type(EDGE_ENEMY)) num_inter_army_edges = len(bipartite_edges[0]) g.add_edges( bipartite_edges[0], bipartite_edges[1], { 'edge_type_one_hot': inter_army_edge_type_one_hot.repeat( num_inter_army_edges, 1), 'edge_type': inter_army_edge_type.repeat(num_inter_army_edges) }) if edge_ally_to_enemy: # the edges from allies to the enemies inter_army_edge_type = torch.Tensor( data=(EDGE_ALLY_TO_ENEMY, )) inter_army_edge_type_one_hot = torch.Tensor( data=get_one_hot_edge_type(EDGE_ALLY_TO_ENEMY)) num_inter_army_edges = len(bipartite_edges[0]) g.add_edges( bipartite_edges[1], bipartite_edges[0], { 'edge_type_one_hot': inter_army_edge_type_one_hot.repeat( num_inter_army_edges, 1), 'edge_type': inter_army_edge_type.repeat(num_inter_army_edges) }) for ally_unit in ally_units: # get all in-attack-range units. include allies units in_range_units = enemy_units.in_attack_range_of(ally_unit) if in_range_units: # when in-attack-range units exist allies_index = allies_unit_dict[ally_unit] for in_range_unit in in_range_units: enemy_index = enemy_unit_dict[in_range_unit] # Expected bottleneck (2) -> Doubled assignment of edges edge_in_attack_range = torch.Tensor( data=(EDGE_IN_ATTACK_RANGE, )) edge_in_attack_range_one_hot = torch.Tensor( data=get_one_hot_edge_type(EDGE_IN_ATTACK_RANGE)) edge_in_attack_range = edge_in_attack_range.reshape(-1) # dist = np.linalg.norm(ally_unit.position - in_range_unit.position) # dist = torch.Tensor(data=(dist,)) # dist = dist.reshape(1, -1) # damage = edge_total_damage(ally_unit, in_range_unit) # damage = torch.Tensor(data=(damage,)).reshape(1, -1) g.add_edge(enemy_index, allies_index, {'edge_type': edge_in_attack_range}) else: pass ret_dict = dict() ret_dict['g'] = g # For interfacing nn action args with sc2 action commends. ret_dict['tag2unit_dict'] = tag2unit_dict ret_dict['units'] = units _gf = [ allies_mineral_cost, allies_vespene_cost, allies_food_cost, enemies_mineral_cost, enemies_vespene_cost, enemies_food_cost ] global_feature = torch.Tensor(data=_gf).view(1, -1) ret_dict['global_feature'] = global_feature return ret_dict
def test_simple_pool(): ctx = F.ctx() g = dgl.DGLGraph(nx.path_graph(15)) g = g.to(F.ctx()) sum_pool = nn.SumPooling() avg_pool = nn.AvgPooling() max_pool = nn.MaxPooling() sort_pool = nn.SortPooling(10) # k = 10 print(sum_pool, avg_pool, max_pool, sort_pool) # test#1: basic h0 = F.randn((g.number_of_nodes(), 5)) sum_pool = sum_pool.to(ctx) avg_pool = avg_pool.to(ctx) max_pool = max_pool.to(ctx) sort_pool = sort_pool.to(ctx) h1 = sum_pool(g, h0) assert F.allclose(F.squeeze(h1, 0), F.sum(h0, 0)) h1 = avg_pool(g, h0) assert F.allclose(F.squeeze(h1, 0), F.mean(h0, 0)) h1 = max_pool(g, h0) assert F.allclose(F.squeeze(h1, 0), F.max(h0, 0)) h1 = sort_pool(g, h0) assert h1.shape[0] == 1 and h1.shape[1] == 10 * 5 and h1.dim() == 2 # test#2: batched graph g_ = dgl.DGLGraph(nx.path_graph(5)).to(F.ctx()) bg = dgl.batch([g, g_, g, g_, g]) h0 = F.randn((bg.number_of_nodes(), 5)) h1 = sum_pool(bg, h0) truth = th.stack([ F.sum(h0[:15], 0), F.sum(h0[15:20], 0), F.sum(h0[20:35], 0), F.sum(h0[35:40], 0), F.sum(h0[40:55], 0) ], 0) assert F.allclose(h1, truth) h1 = avg_pool(bg, h0) truth = th.stack([ F.mean(h0[:15], 0), F.mean(h0[15:20], 0), F.mean(h0[20:35], 0), F.mean(h0[35:40], 0), F.mean(h0[40:55], 0) ], 0) assert F.allclose(h1, truth) h1 = max_pool(bg, h0) truth = th.stack([ F.max(h0[:15], 0), F.max(h0[15:20], 0), F.max(h0[20:35], 0), F.max(h0[35:40], 0), F.max(h0[40:55], 0) ], 0) assert F.allclose(h1, truth) h1 = sort_pool(bg, h0) assert h1.shape[0] == 5 and h1.shape[1] == 10 * 5 and h1.dim() == 2
def to_graph(self, threshold=None, format='edge_list', split=True, frac=[0.7, 0.1, 0.2], seed=42, order='descending'): """Add a method description here. Parameters ---------- threshold : Add a variable description here. format : Add a variable description here. split : Add a variable description here. frac : list, optional (default=frac=[0.7, 0.1, 0.2]) Train/val/test split fractions. seed : int Add a variable description here. order : Add a variable description here. Returns ------- """ ''' Arguments: format: edge_list / dgl / pyg df object ''' df = self.get_data(format='df') if len(np.unique(self.raw_y)) > 2: print("The dataset label consists of affinity scores. " "Binarization using threshold " + str(threshold) + " is conducted to construct the positive edges in the network. " "Adjust the threshold by to_graph(threshold = X)", flush=True, file=sys.stderr) if threshold is None: raise AttributeError( "Please specify the threshold to binarize the data by " "'to_graph(threshold = N)'!") df['label_binary'] = label_transform(self.raw_y, True, threshold, False, verbose=False, order=order) else: # already binary df['label_binary'] = df['Y'] df[self.entity1_name + '_ID'] = df[self.entity1_name + '_ID'].astype(str) df[self.entity2_name + '_ID'] = df[self.entity2_name + '_ID'].astype(str) df_pos = df[df.label_binary == 1] df_neg = df[df.label_binary == 0] return_dict = {} pos_edges = df_pos[ [self.entity1_name + '_ID', self.entity2_name + '_ID']].values neg_edges = df_neg[ [self.entity1_name + '_ID', self.entity2_name + '_ID']].values edges = df[ [self.entity1_name + '_ID', self.entity2_name + '_ID']].values if format == 'edge_list': return_dict['edge_list'] = pos_edges return_dict['neg_edges'] = neg_edges elif format == 'dgl': try: import dgl except: install("dgl") import dgl unique_entities = np.unique(pos_edges.T.flatten()).tolist() index = list(range(len(unique_entities))) dict_ = dict(zip(unique_entities, index)) edge_list1 = np.array([dict_[i] for i in pos_edges.T[0]]) edge_list2 = np.array([dict_[i] for i in pos_edges.T[1]]) return_dict['dgl_graph'] = dgl.DGLGraph((edge_list1, edge_list2)) return_dict['index_to_entities'] = dict_ elif format == 'pyg': try: import torch from torch_geometric.data import Data except: raise ImportError( "Please see https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html to install pytorch geometric!") unique_entities = np.unique(pos_edges.T.flatten()).tolist() index = list(range(len(unique_entities))) dict_ = dict(zip(unique_entities, index)) edge_list1 = np.array([dict_[i] for i in pos_edges.T[0]]) edge_list2 = np.array([dict_[i] for i in pos_edges.T[1]]) edge_index = torch.tensor([edge_list1, edge_list2], dtype=torch.long) x = torch.tensor(np.array(index), dtype=torch.float) data = Data(x=x, edge_index=edge_index) return_dict['pyg_graph'] = data return_dict['index_to_entities'] = dict_ elif format == 'df': return_dict['df'] = df if split: return_dict['split'] = create_fold(df, seed, frac) return return_dict
def prep(self, obs, hiddens, hiddens_u, with_acts=False, add_acts=None): graph_list = [] num_agents = [num_agent[0] for num_agent in obs["num_player"]] prev_num_agents = [(a != -1).sum() for a in obs["player_filter"]] unc_complete_filter = [ ob[:n_p_agent] for ob, n_p_agent in zip(obs["player_filter"], prev_num_agents) ] complete_filter = np.concatenate(unc_complete_filter, axis=-1) # Create graphs inputted to GNN. for num_agent in num_agents: num_agent = int(num_agent) graph_ob = dgl.DGLGraph() graph_ob.add_nodes(num_agent) edge_pairs = [(a, b) for a in range(num_agent) for b in range(num_agent) if a != b] if not len(edge_pairs) == 0: src, dst = zip(*edge_pairs) graph_ob.add_edges(src, dst) graph_list.append(graph_ob) graph_batch = dgl.batch(graph_list) # Parse inputs into node inputs. num_nodes = graph_batch.batch_num_nodes n_ob = torch.cat([ torch.Tensor([obs['player_info'][id][3 * idx:3 * idx + 3] ]).float() for id, num_node in enumerate(num_nodes) for idx in range(num_node) ], dim=0) u_ob = torch.Tensor(obs["food_info"]) # Create filters to decide which hidden vectors to maintain. # For newly added agents, hiddens set to zeros. # For remaining agents, hiddens continues from prev timestep. node_filter_np = np.where(complete_filter == 1)[0] node_filter = torch.Tensor(node_filter_np).long() current_node_offsets, offset = [0], 0 for cur_num_node in num_nodes[:-1]: offset += cur_num_node current_node_offsets.append(offset) new_indices = [] filter_idxes = [ np.arange((filter == 1).sum()) for filter in unc_complete_filter ] for offset, filter in zip(current_node_offsets, filter_idxes): new_indices.append(torch.Tensor(offset + filter).long()) complete_new_filter = torch.cat(new_indices, dim=-1) # Create action vectors for opponent modelling. if with_acts: acts = [] for first_act, last_act, prev_node in zip(add_acts, obs["prev_actions"], prev_num_agents): acts.append(first_act) acts.extend(last_act[:prev_node - 1]) # Filter hidden vectors for remaining agents. # Add zero vectors for newly added agents. n_hid = (torch.zeros([ 1, graph_batch.number_of_nodes(), self.dim_lstm_out ]), torch.zeros([1, graph_batch.number_of_nodes(), self.dim_lstm_out])) #checks to not make it empty. if not (hiddens is None): collected_hiddens = (hiddens[0][:, node_filter, :], hiddens[1][:, node_filter, :]) n_hid[0][:, complete_new_filter, :] = collected_hiddens[0] n_hid[1][:, complete_new_filter, :] = collected_hiddens[1] n_hid_u = (torch.zeros([ 1, graph_batch.number_of_nodes(), self.dim_lstm_out ]), torch.zeros([1, graph_batch.number_of_nodes(), self.dim_lstm_out])) if not (hiddens_u is None): collected_hiddens = (hiddens_u[0][:, node_filter, :], hiddens_u[1][:, node_filter, :]) n_hid_u[0][:, complete_new_filter, :] = collected_hiddens[0] n_hid_u[1][:, complete_new_filter, :] = collected_hiddens[1] if with_acts: return graph_batch, n_ob, u_ob, n_hid, n_hid_u, acts return graph_batch, n_ob, u_ob, n_hid, n_hid_u
def graphClassification(datasets_folder, EdgeLists_folder, NodesEmbedding_folder, number_of_epochs, embedding_size, num_classes, clustering_measure, labels_file): files_name = [ file for file in sorted(os.listdir(datasets_folder), key=lambda s: s.lower()) ] print(len(files_name)) dict_nodes_embedding = get_node_embedding(NodesEmbedding_folder) graphs = retrive_graphs(dict_nodes_embedding, EdgeLists_folder, datasets_folder, labels_file) loo = LeaveOneOut() splits = loo.split(graphs) embedding_dir = "Embeddings_" + clustering_measure for train_index, test_index in splits: # train_index = np.insert(train_index, 0,test_index[0]) train_set = itemgetter(*train_index)(graphs) test_set = [itemgetter(*test_index)(graphs)] data_loader = DataLoader(train_set, batch_size=batch_size, shuffle=False, collate_fn=collate) # Create model for i in range(0, batch_size - 1): c = dgl.DGLGraph() c.add_nodes(1) test_set.append((c, 0, torch.FloatTensor(torch.zeros(1, 64)))) model = Classifier(node_embedding_dim, embedding_size, num_classes) loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.006) model.train() epoch_losses = [] for epoch in range(number_of_epochs): epoch_loss = 0 counter = 0 for iter, (bg, label, embedding) in enumerate(data_loader): embedding_array = [] embedding_array = initiate_feat(embedding) prediction, hg = model(bg, embedding_array) if epoch == number_of_epochs - 1: if not os.path.exists("Embeddings_" + clustering_measure): os.mkdir(embedding_dir) for hidden in hg.detach().numpy(): dataset_hidden_name = files_name[ train_index[counter]].split(".")[0] hidden.tofile(embedding_dir + "/" + dataset_hidden_name + ".csv", sep=',') counter = counter + 1 loss = loss_func(prediction, label) optimizer.zero_grad() loss.backward() optimizer.step() epoch_loss += loss.detach().item() epoch_loss /= (iter + 1) print('Epoch {}, loss {:.4f}'.format(epoch, epoch_loss)) epoch_losses.append(epoch_loss) model.eval() # Convert a list of tuples to two lists test_X, test_Y, embedding_test = map(list, zip(*test_set)) test_bg = dgl.batch(test_X) true_label = test_Y[0] dataset_test_name = files_name[test_index[0]].split(".")[0] print(dataset_test_name) test_Y = torch.tensor(test_Y).float().view(-1, 1) probs_Y, hidden_layer = model(test_bg, initiate_feat(embedding_test)) probs_Y = torch.softmax(probs_Y, 1) sampled_Y = torch.multinomial(probs_Y, 1) argmax_Y = torch.max(probs_Y, 1)[1].view(-1, 1) print( 'Accuracy of sampled predictions on the test set: {:.4f}%'.format( (test_Y == sampled_Y.float()).sum().item() / len(test_Y) * 100)) print('Accuracy of argmax predictions on the test set: {:4f}%'.format( (test_Y == argmax_Y.float()).sum().item() / len(test_Y) * 100)) break return embedding_dir + "/"
def test_edge_softmax(): # Basic g = dgl.DGLGraph(nx.path_graph(3)).to(F.ctx()) edata = F.ones((g.number_of_edges(), 1)) a = nn.edge_softmax(g, edata) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(a, uniform_attention(g, a.shape)) # Test higher dimension case edata = F.ones((g.number_of_edges(), 3, 1)) a = nn.edge_softmax(g, edata) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(a, uniform_attention(g, a.shape)) # Test both forward and backward with Tensorflow built-in softmax. g = dgl.DGLGraph().to(F.ctx()) g.add_nodes(30) # build a complete graph for i in range(30): for j in range(30): g.add_edge(i, j) score = F.randn((900, 1)) with tf.GradientTape() as tape: tape.watch(score) grad = F.randn((900, 1)) y = tf.reshape(F.softmax(tf.reshape(score, (30, 30)), dim=0), (-1, 1)) grads = tape.gradient(y, [score]) grad_score = grads[0] with tf.GradientTape() as tape: tape.watch(score) y_dgl = nn.edge_softmax(g, score) assert len(g.ndata) == 0 assert len(g.edata) == 0 # check forward assert F.allclose(y_dgl, y) grads = tape.gradient(y_dgl, [score]) # checkout gradient assert F.allclose(grads[0], grad_score) print(grads[0][:10], grad_score[:10]) # Test 2 def generate_rand_graph(n): arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype( np.int64) return dgl.DGLGraph(arr, readonly=True) g = generate_rand_graph(50).to(F.ctx()) a1 = F.randn((g.number_of_edges(), 1)) a2 = tf.identity(a1) with tf.GradientTape() as tape: tape.watch(a1) g.edata['s'] = a1 g.group_apply_edges( 'dst', lambda edges: {'ss': F.softmax(edges.data['s'], 1)}) loss = tf.reduce_sum(g.edata['ss']) a1_grad = tape.gradient(loss, [a1])[0] with tf.GradientTape() as tape: tape.watch(a2) builtin_sm = nn.edge_softmax(g, a2) loss = tf.reduce_sum(builtin_sm) a2_grad = tape.gradient(loss, [a2])[0] print(a1_grad - a2_grad) assert len(g.ndata) == 0 assert len(g.edata) == 2 assert F.allclose(a1_grad, a2_grad, rtol=1e-4, atol=1e-4) # Follow tolerance in unittest backend
def plot_locations(graph, ax): had_list = graph.hadron_list node_list = graph.nodes vertices = list(set([node.vertex_idx for node in node_list])) if 0 not in vertices: vertices.append(0) if 1 not in vertices: vertices.append(1) n_vertices = len(vertices) locations_g = dgl.DGLGraph() locations_g.add_nodes(n_vertices) loc_labels = {} loc_spacing = 500 x_range = n_vertices * loc_spacing loc_labels[0] = 'pileup/\nfakes' loc_labels[1] = 'primary' G = locations_g.to_networkx() pos = {} for pos_i in range(n_vertices): pos[pos_i] = (loc_spacing * pos_i, 0) if pos_i < 2: continue for node in node_list: if node.vertex_idx == pos_i: loc_labels[pos_i] = '{0:.2f}'.format( np.linalg.norm(node.origin)) break nx.draw_networkx_nodes(G, pos, node_color='mediumaquamarine', node_size=1800, ax=ax) nx.draw_networkx_edges(G, pos, ax=ax) nx.draw_networkx_labels(G, pos, loc_labels, ax=ax) #fake/pileup vertex center_point = pos[0] sub_g = dgl.DGLGraph() sub_g.add_nodes(1) sub_g_pos = {0: center_point} n_children = 0 r_x = loc_spacing / 3.5 r_y = 30 for node in node_list: if node.vertex_idx == 0: n_children += 1 sub_g.add_nodes(1) sub_g.add_edge(0, n_children) child_idx = 0 n_tracks = 0 for node in node_list: if node.vertex_idx == 0: child_idx += 1 sub_g_pos[child_idx] = ( center_point[0] + r_x * np.cos( ((child_idx - 1) / float(n_children)) * 2 * np.pi), center_point[1] + r_y * np.sin( ((child_idx - 1) / float(n_children)) * 2 * np.pi)) G = sub_g.to_networkx() nx.draw_networkx_nodes(G, sub_g_pos, node_color='skyblue', node_size=800, ax=ax, nodelist=range(1, n_children + 1)) nx.draw_networkx_edges(G, sub_g_pos, ax=ax) for vtx_i in range(1, n_vertices): center_point = pos[vtx_i] sub_g = dgl.DGLGraph() sub_g.add_nodes(1) sub_g_labels = {} sub_g_pos = {0: center_point} n_children = 0 for node in node_list: if node.vertex_idx == vtx_i: n_children += 1 sub_g.add_nodes(1) sub_g.add_edge(0, n_children) if node.pdgid in pdg_id_dict: sub_g_labels[n_children] = pdg_id_dict[node.pdgid] else: sub_g_labels[n_children] = str(node.pdgid) r_x = loc_spacing / 3.5 r_y = 30 child_idx = 0 n_tracks = 0 for node in node_list: if node.vertex_idx == vtx_i: child_idx += 1 sub_g_pos[child_idx] = ( center_point[0] + r_x * np.cos( ((child_idx - 1) / float(n_children)) * 2 * np.pi), center_point[1] + r_y * np.sin( ((child_idx - 1) / float(n_children)) * 2 * np.pi)) if node.reconstructed: sub_g.add_nodes(1) n_tracks += 1 sub_g.add_edge(child_idx, n_children + n_tracks) sub_g_pos[n_children + n_tracks] = ( center_point[0] + 1.5 * r_x * np.cos( ((child_idx - 1) / float(n_children)) * 2 * np.pi), center_point[1] + 1.5 * r_y * np.sin( ((child_idx - 1) / float(n_children)) * 2 * np.pi)) G = sub_g.to_networkx() nx.draw_networkx_nodes(G, sub_g_pos, node_color='darksalmon', node_size=800, ax=ax, nodelist=range(1, n_children + 1)) nx.draw_networkx_nodes(G, sub_g_pos, node_color='skyblue', node_size=300, ax=ax, nodelist=range(n_children + 1, n_children + n_tracks + 1)) nx.draw_networkx_edges(G, sub_g_pos, ax=ax) nx.draw_networkx_labels(G, sub_g_pos, sub_g_labels, ax=ax, nodelist=range(1, n_children + 1))
def test_rgcn(): etype = [] g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True).to(F.ctx()) # 5 etypes R = 5 for i in range(g.number_of_edges()): etype.append(i % 5) B = 2 I = 10 O = 8 rgc_basis = nn.RelGraphConv(I, O, R, "basis", B) rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True) rgc_basis_low.weight = rgc_basis.weight rgc_basis_low.w_comp = rgc_basis.w_comp h = tf.random.normal((100, I)) r = tf.constant(etype) h_new = rgc_basis(g, h, r) h_new_low = rgc_basis_low(g, h, r) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low) rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B) rgc_bdd_low = nn.RelGraphConv(I, O, R, "bdd", B, low_mem=True) rgc_bdd_low.weight = rgc_bdd.weight h = tf.random.normal((100, I)) r = tf.constant(etype) h_new = rgc_bdd(g, h, r) h_new_low = rgc_bdd_low(g, h, r) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low) # with norm norm = tf.zeros((g.number_of_edges(), 1)) rgc_basis = nn.RelGraphConv(I, O, R, "basis", B) rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True) rgc_basis_low.weight = rgc_basis.weight rgc_basis_low.w_comp = rgc_basis.w_comp h = tf.random.normal((100, I)) r = tf.constant(etype) h_new = rgc_basis(g, h, r, norm) h_new_low = rgc_basis_low(g, h, r, norm) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low) rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B) rgc_bdd_low = nn.RelGraphConv(I, O, R, "bdd", B, low_mem=True) rgc_bdd_low.weight = rgc_bdd.weight h = tf.random.normal((100, I)) r = tf.constant(etype) h_new = rgc_bdd(g, h, r, norm) h_new_low = rgc_bdd_low(g, h, r, norm) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low) # id input rgc_basis = nn.RelGraphConv(I, O, R, "basis", B) rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True) rgc_basis_low.weight = rgc_basis.weight rgc_basis_low.w_comp = rgc_basis.w_comp h = tf.constant(np.random.randint(0, I, (100, ))) * 1 r = tf.constant(etype) * 1 h_new = rgc_basis(g, h, r) h_new_low = rgc_basis_low(g, h, r) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low)
def processing_amr(amr_dir, tokens_list): amr_list = torch.load(amr_dir) node_idx_list, edge_type_list, node_idx_offset_list, node_idx_offset_whole = [], [], [], [] list_of_align_dict = [] list_of_exist_dict = [] total_edge_num = 0 covered_edge_num = 0 order_list = [] for i, amr in enumerate(amr_list): amr_split_list = amr.split('\n') # print(amr_split_list) node_to_idx, node_to_offset, node_to_offset_whole = {}, {}, {} node_num = 0 # first to fill in the node list for line in amr_split_list: if line.startswith('# ::node'): node_split = line.split('\t') # print(node_split) if len(node_split) != 4: # check if the alignment text spans exist continue else: align_span = node_split[3].split('-') if not align_span[0].isdigit(): continue head_word_idx = int(align_span[1]) - 1 try: start = int(align_span[0]) except: # print(amr_list[i]) raise ValueError end = int(align_span[1]) if (start, end) not in list(node_to_offset_whole.values()): node_to_offset.update({node_split[1]: head_word_idx}) node_to_offset_whole.update( {node_split[1]: (start, end)}) node_to_idx.update({node_split[1]: node_num}) node_num += 1 else: continue node_idx_list.append(node_to_idx) # change str2offset to idx2offset node_idx_to_offset = {} for key in node_to_idx.keys(): node_idx_to_offset.update({node_to_idx[key]: node_to_offset[key]}) node_idx_to_offset_whole = {} for key in node_to_idx.keys(): node_idx_to_offset_whole.update( {node_to_idx[key]: node_to_offset_whole[key]}) node_idx_offset_list.append(node_idx_to_offset) node_idx_offset_whole.append(node_idx_to_offset_whole) edge_type_dict = {} for line in amr_split_list: if line.startswith('# ::root'): root_split = line.split('\t') root = root_split[1] prior_dict = {root: []} start_list = [] end_list = [] for line in amr_split_list: if line.startswith('# ::edge'): edge_split = line.split('\t') amr_edge_type = edge_split[2] edge_start = edge_split[4] edge_end = edge_split[5] # check if the start and end nodes exist if (edge_start in node_to_idx) and (edge_end in node_to_idx): # check if the edge type is "ARGx-of", if so, reverse the direction of the edge if amr_edge_type.startswith( "ARG") and amr_edge_type.endswith("-of"): edge_start, edge_end = edge_end, edge_start amr_edge_type = amr_edge_type[0:4] # deal with this edge here edge_idx = get_amr_edge_idx(amr_edge_type) total_edge_num += 1 if edge_idx == 11: covered_edge_num += 1 start_idx = node_to_idx[edge_start] end_idx = node_to_idx[edge_end] edge_type_dict.update({(start_idx, end_idx): edge_idx}) else: continue # print(edge_start, edge_end) if edge_end != root and (not ((edge_start in end_list) and (edge_end in start_list))): start_list.append(edge_start) end_list.append(edge_end) if edge_start not in prior_dict: prior_dict.update({edge_start: [edge_end]}) else: prior_dict[edge_start].append(edge_end) else: continue edge_type_list.append(edge_type_dict) # generating priority list for decoding final_order_list = [] # output orders candidate_nodes = node_to_idx.copy() while len(candidate_nodes) != 0: current_level_nodes = [] for key in candidate_nodes: if key not in end_list: final_order_list.append(candidate_nodes[key]) current_level_nodes.append(key) # Remove current level nodes from the dictionary for node in current_level_nodes: candidate_nodes.pop(node) # deleting from start lists the current level nodes for node in current_level_nodes: indices_list = [ i for i, x in enumerate(start_list) if x == node ] start_list = [x for x in start_list if x != node] new_end_list = [] for i in range(len(end_list)): if i not in indices_list: new_end_list.append(end_list[i]) end_list = new_end_list order_list.append(final_order_list.copy()) # feed into dgl graphs graphs_list = [] for i in range(len(node_idx_list)): graph_i = dgl.DGLGraph() edge2type = edge_type_list[i] node2offset = node_idx_offset_list[i] node2offset_whole = node_idx_offset_whole[i] nodes_num = len(node2offset) graph_i.add_nodes(nodes_num) graph_i.ndata['token_pos'] = torch.zeros(nodes_num, 1, dtype=torch.long) graph_i.ndata['token_span'] = torch.zeros(nodes_num, 2, dtype=torch.long) # fill in token positions for key in node2offset: graph_i.ndata['token_pos'][key][0] = node2offset[key] for key in node2offset: graph_i.ndata['token_span'][key][0] = node2offset_whole[key][0] graph_i.ndata['token_span'][key][1] = node2offset_whole[key][1] # add nodes priorities node_prior_tensor = torch.zeros(nodes_num, 1, dtype=torch.long) for j in range(nodes_num): node_prior_tensor[j][0] = order_list[i].index(j) graph_i.ndata['priority'] = node_prior_tensor # add edges edge_num = len(edge2type) edge_iter = 0 ''' bi-directional edges ''' edge_type_tensor = torch.zeros(2 * edge_num, 1, dtype=torch.long) for key in edge2type: graph_i.add_edges(key[0], key[1]) edge_type_tensor[edge_iter][0] = edge2type[key] edge_iter += 1 for key in edge2type: graph_i.add_edges(key[1], key[0]) edge_type_tensor[edge_iter][0] = edge2type[key] edge_iter += 1 graph_i.edata['type'] = edge_type_tensor graphs_list.append(graph_i) align_dict = {} exist_dict = {} span_list = graph_i.ndata["token_span"].tolist() for p in range(len(tokens_list[i])): min_dis = 2 * len(tokens_list[i]) min_dis_idx = -1 if_found = 0 for q in range(len(span_list)): if p >= span_list[q][0] and p < span_list[q][1]: if_found = 1 align_dict.update({p: q}) exist_dict.update({p: 1}) break else: new_dis_1 = abs(p - span_list[q][0]) new_dis_2 = abs(p - (span_list[q][1] - 1)) new_dis = min(new_dis_1, new_dis_2) if new_dis < min_dis: min_dis = new_dis min_dis_idx = q if not if_found: align_dict.update({p: min_dis_idx}) exist_dict.update({p: 0}) list_of_align_dict.append(align_dict) list_of_exist_dict.append(exist_dict) return graphs_list, list_of_align_dict, list_of_exist_dict