def process(self): with open(os.path.join(self.raw_dir, 'ACM3025.pkl'), 'rb') as f: data = pickle.load(f) features = torch.from_numpy( data['feature'].todense()).float() # (3025, 1870) labels = torch.from_numpy( data['label'].todense()).long().nonzero(as_tuple=True)[1] # (3025) # Adjacency matrices for meta-path based neighbors # (Mufei): I verified both of them are binary adjacency matrices with self loops author_g = dgl.from_scipy(data['PAP']) subject_g = dgl.from_scipy(data['PLP']) self.gs = [author_g, subject_g] num_nodes = data['label'].shape[0] train_mask = generate_mask_tensor( idx2mask(data['train_idx'][0], num_nodes)) val_mask = generate_mask_tensor(idx2mask(data['val_idx'][0], num_nodes)) test_mask = generate_mask_tensor( idx2mask(data['test_idx'][0], num_nodes)) for g in self.gs: g.ndata['feat'] = features g.ndata['label'] = labels g.ndata['train_mask'] = train_mask g.ndata['val_mask'] = val_mask g.ndata['test_mask'] = test_mask
def dgl_graph_from_vec(vec, graph_params): """ Create graph from flatten vector as a thresholed weighted matrix with properties as type torch """ if graph_params.flatten: W = vec_to_sym(vec) else: W = vec # create graph # add signal on nodes u = getattr(feature_generation, graph_params.node_feat)(W) if graph_params.thr_type == 'pos': W[W < graph_params.threshold] = 0 else: W[np.abs(W) < graph_params.threshold] = 0 # convert to pytorch? W = sparse.csr_matrix(W).tocoo() edge_weight = torch.tensor(W.data).float() u = torch.from_numpy(u.astype(np.float32)) g = dgl.from_scipy(W) g.ndata['feat'] = u g.edata['weight'] = edge_weight if graph_params.add_self_loop: g = dgl.add_self_loop(g) g.edata['weight'][-graph_params.n_nodes:] = 1 return g
def generate_rand_graph(n, is_hetero): arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype( np.int64) if is_hetero: return dgl.from_scipy(arr) else: return DGLGraph(arr, readonly=True)
def load_ppi_data(root): DataType = namedtuple('Dataset', ['num_classes', 'g']) adj_full = sp.load_npz(os.path.join(root, 'ppi', 'adj_full.npz')) G = dgl.from_scipy(adj_full) nodes_num = G.num_nodes() role = json.load(open(os.path.join(root, 'ppi', 'role.json'), 'r')) tr = list(role['tr']) te = list(role['te']) va = list(role['va']) mask = np.zeros((nodes_num, ), dtype=bool) train_mask = mask.copy() train_mask[tr] = True val_mask = mask.copy() val_mask[va] = True test_mask = mask.copy() test_mask[te] = True G.ndata['train_mask'] = torch.tensor(train_mask, dtype=torch.bool) G.ndata['val_mask'] = torch.tensor(val_mask, dtype=torch.bool) G.ndata['test_mask'] = torch.tensor(test_mask, dtype=torch.bool) feats = np.load(os.path.join(root, 'ppi', 'feats.npy')) G.ndata['feat'] = torch.tensor(feats, dtype=torch.float) class_map = json.load( open(os.path.join(root, 'ppi', 'class_map.json'), 'r')) labels = np.array([class_map[str(i)] for i in range(nodes_num)]) G.ndata['label'] = torch.tensor(labels, dtype=torch.float) data = DataType(g=G, num_classes=labels.shape[1]) return data
def _prepare(self): t0 = time.time() print("[I] Preparing Circular Skip Link Graphs v4 ...") for sample in self.adj_list: _g = dgl.from_scipy(sample) g = dgl.transform.remove_self_loop(_g) g.ndata['feat'] = torch.zeros(g.number_of_nodes()).long() #g.ndata['feat'] = torch.arange(0, g.number_of_nodes()).long() # v1 #g.ndata['feat'] = torch.randperm(g.number_of_nodes()).long() # v3 # adding edge features as generic requirement g.edata['feat'] = torch.zeros(g.number_of_edges()).long() #g.edata['feat'] = torch.arange(0, g.number_of_edges()).long() # v1 #g.edata['feat'] = torch.ones(g.number_of_edges()).long() # v2 # NOTE: come back here, to define edge features as distance between the indices of the edges ################################################################### # srcs, dsts = new_g.edges() # edge_feat = [] # for edge in range(len(srcs)): # a = srcs[edge].item() # b = dsts[edge].item() # edge_feat.append(abs(a-b)) # g.edata['feat'] = torch.tensor(edge_feat, dtype=torch.int).long() ################################################################### self.graph_lists.append(g) self.num_node_type = self.graph_lists[0].ndata['feat'].size(0) self.num_edge_type = self.graph_lists[0].edata['feat'].size(0) print("[I] Finished preparation after {:.4f}s".format(time.time() - t0))
def test_topological_nodes(idtype, n=100): a = sp.random(n, n, 3 / n, data_rvs=lambda n: np.ones(n)) b = sp.tril(a, -1).tocoo() g = dgl.from_scipy(b).astype(idtype) layers_dgl = dgl.topological_nodes_generator(g) adjmat = g.adjacency_matrix(transpose=True) def tensor_topo_traverse(): n = g.number_of_nodes() mask = F.copy_to(F.ones((n, 1)), F.cpu()) degree = F.spmm(adjmat, mask) while F.reduce_sum(mask) != 0.: v = F.astype((degree == 0.), F.float32) v = v * mask mask = mask - v frontier = F.copy_to(F.nonzero_1d(F.squeeze(v, 1)), F.cpu()) yield frontier degree -= F.spmm(adjmat, v) layers_spmv = list(tensor_topo_traverse()) assert len(layers_dgl) == len(layers_spmv) assert all(toset(x) == toset(y) for x, y in zip(layers_dgl, layers_spmv))
def predict(adj, features): # os.environ["CUDA_VISIBLE_DEVICES"] = '0' dev = th.device('cpu') ''' fg = open(sys.argv[1], 'rb') adj = pickle.load(fg) features = np.load(sys.argv[2]) ''' #graph = dgl.DGLGraph() adj = adj_preprocess(adj) #graph.from_scipy_sparse_matrix(adj) graph = dgl.from_scipy(adj) features = th.FloatTensor(features).to(dev) features[th.where(features < -1.0)[0]] = 0 features[th.where(features > 1.0)[0]] = 0 features = 2 * th.atan(features) / th.Tensor([np.pi]).to(dev) graph.ndata['features'] = features model = TAGCN(100, 128, 20, 3, activation=F.leaky_relu, dropout=0.0) model_states = th.load('speit/model.pkl', map_location=dev) model.load_state_dict(model_states) model = model.to(dev) model.eval() logits = model(graph, features) pred = logits.argmax(1) return pred.cpu().numpy()
def load_acm(remove_self_loop): url = 'dataset/ACM3025.pkl' data_path = get_download_dir() + '/ACM3025.pkl' # download(_get_dgl_url(url), path=data_path) with open( data_path, 'rb' ) as f: # 导入data数据。dict_keys(['label', 'feature', 'PAP', 'PLP', 'train_idx', 'val_idx', 'test_idx']) data = pickle.load(f) labels, features = torch.from_numpy(data['label'].todense()).long(), \ torch.from_numpy(data['feature'].todense()).float() num_classes = labels.shape[1] labels = labels.nonzero()[:, 1] # 将label的one-hot转换成类别 if remove_self_loop: num_nodes = data['label'].shape[0] data['PAP'] = sparse.csr_matrix(data['PAP'] - np.eye(num_nodes)) data['PLP'] = sparse.csr_matrix(data['PLP'] - np.eye(num_nodes)) # Adjacency matrices for meta path based neighbors # (Mufei): I verified both of them are binary adjacency matrices with self loops author_g = dgl.from_scipy( data['PAP']) # 定义p-a-p的meta-path; # 建立dgl格式的graph subject_g = dgl.from_scipy(data['PLP']) # 定义p-s-p的meta-path gs = [author_g, subject_g] # 将两个meta-path形成的图组合在一起 train_idx = torch.from_numpy(data['train_idx']).long().squeeze(0) val_idx = torch.from_numpy(data['val_idx']).long().squeeze(0) test_idx = torch.from_numpy(data['test_idx']).long().squeeze(0) num_nodes = author_g.number_of_nodes() # 节点数量 train_mask = get_binary_mask(num_nodes, train_idx) # 对应位置上的节点设置为1,其余位置为0 val_mask = get_binary_mask(num_nodes, val_idx) test_mask = get_binary_mask(num_nodes, test_idx) print('dataset loaded') pprint({ 'dataset': 'ACM', 'train': train_mask.sum().item() / num_nodes, 'val': val_mask.sum().item() / num_nodes, 'test': test_mask.sum().item() / num_nodes }) # Returns: # gs - PAP,PSP下的图; fetures - 节点特征; labels:labels; num_classes:label数量 return gs, features, labels, num_classes, train_idx, val_idx, test_idx, \ train_mask, val_mask, test_mask
def load_acm(remove_self_loop): filename = 'ACM3025.pkl' url = 'dataset/' + filename data_path = get_download_dir() + '/' + filename if osp.exists(data_path): print(f'Using existing file {filename}', file=sys.stderr) else: download(_get_dgl_url(url), path=data_path) with open(data_path, 'rb') as f: data = pickle.load(f) labels, features = torch.from_numpy(data['label'].todense()).long(), \ torch.from_numpy(data['feature'].todense()).float() num_classes = labels.shape[1] labels = labels.nonzero()[:, 1] if remove_self_loop: num_nodes = data['label'].shape[0] data['PAP'] = sparse.csr_matrix(data['PAP'] - np.eye(num_nodes)) data['PLP'] = sparse.csr_matrix(data['PLP'] - np.eye(num_nodes)) # Adjacency matrices for meta path based neighbors # (Mufei): I verified both of them are binary adjacency matrices with self loops author_g = dgl.from_scipy(data['PAP']) subject_g = dgl.from_scipy(data['PLP']) gs = [author_g, subject_g] train_idx = torch.from_numpy(data['train_idx']).long().squeeze(0) val_idx = torch.from_numpy(data['val_idx']).long().squeeze(0) test_idx = torch.from_numpy(data['test_idx']).long().squeeze(0) num_nodes = author_g.number_of_nodes() train_mask = get_binary_mask(num_nodes, train_idx) val_mask = get_binary_mask(num_nodes, val_idx) test_mask = get_binary_mask(num_nodes, test_idx) print('dataset loaded') pprint({ 'dataset': 'ACM', 'train': train_mask.sum().item() / num_nodes, 'val': val_mask.sum().item() / num_nodes, 'test': test_mask.sum().item() / num_nodes }) return gs, features, labels, num_classes, train_idx, val_idx, test_idx, \ train_mask, val_mask, test_mask
def track_time(size, scipy_format): matrix_dict = { "small": dgl.data.CiteseerGraphDataset(verbose=False)[0].adjacency_matrix( scipy_fmt=scipy_format), "large": utils.get_livejournal().adjacency_matrix(scipy_fmt=scipy_format) } # dry run dgl.from_scipy(matrix_dict[size]) # timing with utils.Timer() as t: for i in range(3): dgl.from_scipy(matrix_dict[size]) return t.elapsed_secs / 3
def diffuse(progress_g, weighted_adj, degree): device = progress_g.device progress_adj = progress_g.adj(scipy_fmt='coo') progress_adj.data = progress_g.edata['weight'].cpu().numpy() ret_adj = sparse.coo_matrix( progress_adj @ (weighted_adj / degree.cpu().numpy())) ret_graph = dgl.from_scipy(ret_adj, eweight_name='weight').to(device) ret_graph.edata['weight'] = ret_graph.edata['weight'].float().to( device) return ret_graph
def mat2graph(adjacent_matrix, weighted=False, init_feat=None): g = from_scipy(sparse.csc_matrix(adjacent_matrix)) g.ndata['in_degrees'] = sum(tensor(adjacent_matrix), 0) g.ndata['out_degrees'] = sum(tensor(adjacent_matrix), 1) if init_feat is not None: g.ndata['init_h'] = tensor(init_feat).float() if weighted: weight = adjacent_matrix.flatten() g.edata['w'] = tensor(weight[weight != 0]).float() return g
def load_data(args, multilabel): if not os.path.exists('graphsaintdata') and not os.path.exists('data'): raise ValueError("The directory graphsaintdata does not exist!") elif os.path.exists('graphsaintdata') and not os.path.exists('data'): os.rename('graphsaintdata', 'data') prefix = "data/{}".format(args.dataset) DataType = namedtuple('Dataset', ['num_classes', 'train_nid', 'g']) adj_full = scipy.sparse.load_npz( './{}/adj_full.npz'.format(prefix)).astype(np.bool) g = dgl.from_scipy(adj_full) num_nodes = g.num_nodes() adj_train = scipy.sparse.load_npz( './{}/adj_train.npz'.format(prefix)).astype(np.bool) train_nid = np.array(list(set(adj_train.nonzero()[0]))) role = json.load(open('./{}/role.json'.format(prefix))) mask = np.zeros((num_nodes, ), dtype=bool) train_mask = mask.copy() train_mask[role['tr']] = True val_mask = mask.copy() val_mask[role['va']] = True test_mask = mask.copy() test_mask[role['te']] = True feats = np.load('./{}/feats.npy'.format(prefix)) scaler = StandardScaler() scaler.fit(feats[train_nid]) feats = scaler.transform(feats) class_map = json.load(open('./{}/class_map.json'.format(prefix))) class_map = {int(k): v for k, v in class_map.items()} if multilabel: # Multi-label binary classification num_classes = len(list(class_map.values())[0]) class_arr = np.zeros((num_nodes, num_classes)) for k, v in class_map.items(): class_arr[k] = v else: num_classes = max(class_map.values()) - min(class_map.values()) + 1 class_arr = np.zeros((num_nodes, )) for k, v in class_map.items(): class_arr[k] = v g.ndata['feat'] = torch.tensor(feats, dtype=torch.float) g.ndata['label'] = torch.tensor( class_arr, dtype=torch.float if multilabel else torch.long) g.ndata['train_mask'] = torch.tensor(train_mask, dtype=torch.bool) g.ndata['val_mask'] = torch.tensor(val_mask, dtype=torch.bool) g.ndata['test_mask'] = torch.tensor(test_mask, dtype=torch.bool) data = DataType(g=g, num_classes=num_classes, train_nid=train_nid) return data
def attach_graph(g, k): device = g.device out_graph_list = [] in_graph_list = [] wadj, ind, outd = DiffConv.get_weight_matrix(g) adj = sparse.coo_matrix(wadj / outd.cpu().numpy()) outg = dgl.from_scipy(adj, eweight_name='weight').to(device) outg.edata['weight'] = outg.edata['weight'].float().to(device) out_graph_list.append(outg) for i in range(k - 1): out_graph_list.append( DiffConv.diffuse(out_graph_list[-1], wadj, outd)) adj = sparse.coo_matrix(wadj.T / ind.cpu().numpy()) ing = dgl.from_scipy(adj, eweight_name='weight').to(device) ing.edata['weight'] = ing.edata['weight'].float().to(device) in_graph_list.append(ing) for i in range(k - 1): in_graph_list.append( DiffConv.diffuse(in_graph_list[-1], wadj.T, ind)) return out_graph_list, in_graph_list
def generate_g(self, estimated_adj): args = self.args if args.symmetric: adj = (estimated_adj + estimated_adj.t()) / 2 else: adj = estimated_adj a = (adj.cpu() + torch.eye(adj.shape[0])).detach().cpu().numpy() b = sp.coo_matrix(a) g = dgl.from_scipy(b, 'weight').to(device) del a, b return g
def test_rgcn(O): ctx = F.ctx() etype = [] g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.1)).to(F.ctx()) # 5 etypes R = 5 for i in range(g.number_of_edges()): etype.append(i % 5) B = 2 I = 10 rgc_basis = nn.RelGraphConv(I, O, R, "basis", B) rgc_basis.initialize(ctx=ctx) h = nd.random.randn(100, I, ctx=ctx) r = nd.array(etype, ctx=ctx) h_new = rgc_basis(g, h, r) assert list(h_new.shape) == [100, O] if O % B == 0: rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B) rgc_bdd.initialize(ctx=ctx) h = nd.random.randn(100, I, ctx=ctx) r = nd.array(etype, ctx=ctx) h_new = rgc_bdd(g, h, r) assert list(h_new.shape) == [100, O] # with norm norm = nd.zeros((g.number_of_edges(), 1), ctx=ctx) rgc_basis = nn.RelGraphConv(I, O, R, "basis", B) rgc_basis.initialize(ctx=ctx) h = nd.random.randn(100, I, ctx=ctx) r = nd.array(etype, ctx=ctx) h_new = rgc_basis(g, h, r, norm) assert list(h_new.shape) == [100, O] if O % B == 0: rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B) rgc_bdd.initialize(ctx=ctx) h = nd.random.randn(100, I, ctx=ctx) r = nd.array(etype, ctx=ctx) h_new = rgc_bdd(g, h, r, norm) assert list(h_new.shape) == [100, O] # id input rgc_basis = nn.RelGraphConv(I, O, R, "basis", B) rgc_basis.initialize(ctx=ctx) h = nd.random.randint(0, I, (100,), ctx=ctx) r = nd.array(etype, ctx=ctx) h_new = rgc_basis(g, h, r) assert list(h_new.shape) == [100, O]
def create_graph(self, edges_src, edges_dst, num_nodes): """graph = dgl.graph((edges_src, edges_dst), num_nodes=num_nodes) graph = dgl.remove_self_loop(graph) graph = dgl.add_reverse_edges(graph) graph = dgl.add_self_loop(graph)""" adj = sp.coo_matrix((np.ones(num_nodes), (edges_src, edges_dst)), shape=(num_nodes, num_nodes), dtype=np.float32) # build symmetric adjacency matrix adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) adj = normalize(adj + sp.eye(adj.shape[0])) graph = dgl.from_scipy(adj, eweight_name='w') return graph
def astensor(x, *, dtype=None, device=None, escape=None): try: if x is None or (escape is not None and isinstance(x, escape)): return x except TypeError: raise TypeError(f"argument 'escape' must be a type or tuple of types.") if dtype is None: dtype = gf.infer_type(x) if isinstance(dtype, (np.dtype, str)): dtype = data_type_dict().get(str(dtype), dtype) elif not isinstance(dtype, torch.dtype): raise TypeError( f"argument 'dtype' must be torch.dtype, np.dtype or str, but got {type(dtype)}." ) if is_tensor(x): tensor = x.to(dtype) elif gf.is_tensor(x, backend='tensorflow'): return astensor(gf.tensoras(x), dtype=dtype, device=device, escape=escape) elif sp.isspmatrix(x): if gg.backend() == "dgl_torch": import dgl tensor = dgl.from_scipy(x, idtype=getattr(torch, gg.intx())) elif gg.backend() == "pyg": edge_index, edge_weight = gf.sparse_adj_to_edge(x) return (astensor(edge_index, dtype=gg.intx(), device=device, escape=escape), astensor(edge_weight, dtype=gg.floatx(), device=device, escape=escape)) else: tensor = sparse_adj_to_sparse_tensor(x, dtype=dtype) elif any((isinstance(x, (np.ndarray, np.matrix)), gg.is_listlike(x), gg.is_scalar(x))): tensor = torch.tensor(x, dtype=dtype, device=device) else: raise TypeError( f"Invalid type of inputs. Allowed data type (Tensor, SparseTensor, Numpy array, Scipy sparse tensor, None), but got {type(x)}." ) return tensor.to(device)
def update_graph(model, optimizer, features, adj, rew_states, loss, args, envs): if adj.shape[0] > 1: labels = torch.zeros((len(features))) idx_train = torch.LongTensor([0]) for r_s in rew_states: if len(envs.observation_space.shape) == 1: #MuJoCo experiments labels[r_s[0]] = torch.sigmoid(2 * r_s[1]) else: labels[r_s[0]] = torch.tensor( [1.]) if r_s[1] > 0. else torch.tensor([0.]) idx_train = torch.cat((idx_train, torch.LongTensor([r_s[0]])), 0) labels = labels.type(torch.LongTensor) else: labels = torch.zeros((len(features))).type(torch.LongTensor) idx_train = torch.LongTensor([0]) adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) deg = np.diag(adj.toarray().sum(axis=1)) laplacian = torch.from_numpy((deg - adj.toarray()).astype(np.float32)) adj = sp.csr_matrix(adj) + sp.eye(adj.shape[0]) g = dgl.from_scipy(adj) if args.cuda and torch.cuda.is_available(): model.cuda() features = features.cuda() laplacian = laplacian.cuda() labels = labels.cuda() idx_train = idx_train.cuda() g = g.to('cuda') t_total = time.time() for epoch in range(args.gcn_epochs): t = time.time() model.train() optimizer.zero_grad() output = model(features, g) loss_train = F.nll_loss(output[idx_train], labels[idx_train]) soft_out = torch.unsqueeze( torch.nn.functional.softmax(output, dim=1)[:, 1], 1) loss_reg = torch.mm(torch.mm(soft_out.T, laplacian), soft_out) loss_train += args.gcn_lambda * loss_reg.squeeze() loss_train.backward() optimizer.step()
def test_dense_cheb_conv(): for k in range(1, 4): ctx = F.ctx() g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.3)).to(F.ctx()) adj = g.adjacency_matrix(ctx=ctx).tostype('default') cheb = nn.ChebConv(5, 2, k) dense_cheb = nn.DenseChebConv(5, 2, k) cheb.initialize(ctx=ctx) dense_cheb.initialize(ctx=ctx) for i in range(len(cheb.fc)): dense_cheb.fc[i].weight.set_data(cheb.fc[i].weight.data()) if cheb.bias is not None: dense_cheb.bias.set_data(cheb.bias.data()) feat = F.randn((100, 5)) out_cheb = cheb(g, feat, [2.0]) out_dense_cheb = dense_cheb(adj, feat, 2.0) assert F.allclose(out_cheb, out_dense_cheb)
def astensor(x, *, dtype=None, device=None, escape=None): try: if x is None or (escape is not None and isinstance(x, escape)): return x except TypeError: raise TypeError(f"argument 'escape' must be a type or tuple of types.") if dtype is None: dtype = gf.infer_type(x) elif isinstance(dtype, tf.dtypes.DType): dtype = dtype.name elif isinstance(dtype, (np.dtype, str)): dtype = str(dtype) else: raise TypeError( f"argument 'dtype' must be tf.dtypes.DType, np.dtype or str, but got {type(dtype)}." ) with tf.device(device): if is_tensor(x): if x.dtype != dtype: return tf.cast(x, dtype=dtype) return tf.identity(x) elif gf.is_tensor(x, backend='torch'): return astensor(gf.tensoras(x), dtype=dtype, device=device, escape=escape) elif sp.isspmatrix(x): if gg.backend() == "dgl_tf": import dgl return dgl.from_scipy(x, idtype=getattr(tf, gg.intx())).to(device) else: return sparse_adj_to_sparse_tensor(x, dtype=dtype) elif any((isinstance(x, (np.ndarray, np.matrix)), gg.is_listlike(x), gg.is_scalar(x))): return tf.convert_to_tensor(x, dtype=dtype) else: raise TypeError( f"Invalid type of inputs. Allowed data type(Tensor, SparseTensor, Numpy array, Scipy sparse matrix, None), but got {type(x)}." )
def test_bfs(idtype, n=100): def _bfs_nx(g_nx, src): edges = nx.bfs_edges(g_nx, src) layers_nx = [set([src])] edges_nx = [] frontier = set() edge_frontier = set() for u, v in edges: if u in layers_nx[-1]: frontier.add(v) edge_frontier.add(g.edge_ids(int(u), int(v))) else: layers_nx.append(frontier) edges_nx.append(edge_frontier) frontier = set([v]) edge_frontier = set([g.edge_ids(u, v)]) # avoids empty successors if len(frontier) > 0 and len(edge_frontier) > 0: layers_nx.append(frontier) edges_nx.append(edge_frontier) return layers_nx, edges_nx a = sp.random(n, n, 3 / n, data_rvs=lambda n: np.ones(n)) g = dgl.from_scipy(a).astype(idtype) g_nx = g.to_networkx() src = random.choice(range(n)) layers_nx, _ = _bfs_nx(g_nx, src) layers_dgl = dgl.bfs_nodes_generator(g, src) assert len(layers_dgl) == len(layers_nx) assert all(toset(x) == y for x, y in zip(layers_dgl, layers_nx)) g_nx = nx.random_tree(n, seed=42) g = dgl.from_networkx(g_nx).astype(idtype) src = 0 _, edges_nx = _bfs_nx(g_nx, src) edges_dgl = dgl.bfs_edges_generator(g, src) assert len(edges_dgl) == len(edges_nx) assert all(toset(x) == y for x, y in zip(edges_dgl, edges_nx))
def web_main(): adj, features = load_data(args.dataset) features = sparse_to_tuple(features.tocoo()) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train # # Create model # graph = dgl.from_scipy(adj) # graph.add_self_loop() # Some preprocessing adj_normalization, adj_norm = preprocess_graph(adj) # Create model graph = dgl.from_scipy(adj_normalization) graph.add_self_loop() # Create Model pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) adj_norm = torch.sparse.FloatTensor(torch.LongTensor(adj_norm[0].T), torch.FloatTensor(adj_norm[1]), torch.Size(adj_norm[2])) adj_label = torch.sparse.FloatTensor(torch.LongTensor(adj_label[0].T), torch.FloatTensor(adj_label[1]), torch.Size(adj_label[2])) features = torch.sparse.FloatTensor(torch.LongTensor(features[0].T), torch.FloatTensor(features[1]), torch.Size(features[2])) weight_mask = adj_label.to_dense().view(-1) == 1 weight_tensor = torch.ones(weight_mask.size(0)) weight_tensor[weight_mask] = pos_weight features = features.to_dense() in_dim = features.shape[-1] vgae_model = model.VGAEModel(in_dim, args.hidden1, args.hidden2) # create training component optimizer = torch.optim.Adam(vgae_model.parameters(), lr=args.learning_rate) print('Total Parameters:', sum([p.nelement() for p in vgae_model.parameters()])) def get_scores(edges_pos, edges_neg, adj_rec): def sigmoid(x): return 1 / (1 + np.exp(-x)) # Predict on test set of edges preds = [] pos = [] for e in edges_pos: # print(e) # print(adj_rec[e[0], e[1]]) preds.append(sigmoid(adj_rec[e[0], e[1]].item())) pos.append(adj_orig[e[0], e[1]]) preds_neg = [] neg = [] for e in edges_neg: preds_neg.append(sigmoid(adj_rec[e[0], e[1]].data)) neg.append(adj_orig[e[0], e[1]]) preds_all = np.hstack([preds, preds_neg]) labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))]) roc_score = roc_auc_score(labels_all, preds_all) ap_score = average_precision_score(labels_all, preds_all) return roc_score, ap_score def get_acc(adj_rec, adj_label): labels_all = adj_label.to_dense().view(-1).long() preds_all = (adj_rec > 0.5).view(-1).long() accuracy = (preds_all == labels_all).sum().float() / labels_all.size(0) return accuracy # create training epoch for epoch in range(args.epochs): t = time.time() # Training and validation using a full graph vgae_model.train() logits = vgae_model.forward(graph, features) # compute loss loss = norm * F.binary_cross_entropy(logits.view(-1), adj_label.to_dense().view(-1), weight=weight_tensor) kl_divergence = 0.5 / logits.size(0) * ( 1 + 2 * vgae_model.log_std - vgae_model.mean**2 - torch.exp(vgae_model.log_std)**2).sum(1).mean() loss -= kl_divergence # backward optimizer.zero_grad() loss.backward() optimizer.step() train_acc = get_acc(logits, adj_label) val_roc, val_ap = get_scores(val_edges, val_edges_false, logits) # Print out performance print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(loss.item()), "train_acc=", "{:.5f}".format(train_acc), "val_roc=", "{:.5f}".format(val_roc), "val_ap=", "{:.5f}".format(val_ap), "time=", "{:.5f}".format(time.time() - t)) test_roc, test_ap = get_scores(test_edges, test_edges_false, logits) print("End of training!", "test_roc=", "{:.5f}".format(test_roc), "test_ap=", "{:.5f}".format(test_ap))
print(pa_g.number_of_edges('written-by')) print(pa_g.successors( 1, etype='written-by')) # get the authors that write paper #1 # Type name argument could be omitted whenever the behavior is unambiguous. print(pa_g.number_of_edges() ) # Only one edge type, the edge type argument could be omitted ############################################################################### # A homogeneous graph is just a special case of a heterograph with only one type # of node and edge. # Paper-citing-paper graph is a homogeneous graph pp_g = dgl.heterograph({('paper', 'citing', 'paper'): data['PvsP'].nonzero()}) # equivalent (shorter) API for creating homogeneous graph pp_g = dgl.from_scipy(data['PvsP']) # All the ntype and etype arguments could be omitted because the behavior is unambiguous. print(pp_g.number_of_nodes()) print(pp_g.number_of_edges()) print(pp_g.successors(3)) ############################################################################### # Create a subset of the ACM graph using the paper-author, paper-paper, # and paper-subject relationships. Meanwhile, also add the reverse # relationship to prepare for the later sections. G = dgl.heterograph({ ('paper', 'written-by', 'author'): data['PvsA'].nonzero(), ('author', 'writing', 'paper'):
def create_random_graph(n): arr = (spsp.random(n, n, density=0.001, format='coo', random_state=100) != 0).astype(np.int64) return dgl.from_scipy(arr)
attention_mask = th.cat([ attention_mask[:-nb_test], th.zeros((nb_word, max_length), dtype=th.long), attention_mask[-nb_test:] ]) # transform one-hot label to class ID for pytorch computation y = y_train + y_test + y_val y_train = y_train.argmax(axis=1) y = y.argmax(axis=1) # document mask used for update feature doc_mask = train_mask + val_mask + test_mask # build DGL Graph adj_norm = normalize_adj(adj + sp.eye(adj.shape[0])) g = dgl.from_scipy(adj_norm.astype('float32'), eweight_name='edge_weight') g.ndata['input_ids'], g.ndata['attention_mask'] = input_ids, attention_mask g.ndata['label'], g.ndata['train'], g.ndata['val'], g.ndata['test'] = \ th.LongTensor(y), th.FloatTensor(train_mask), th.FloatTensor(val_mask), th.FloatTensor(test_mask) g.ndata['label_train'] = th.LongTensor(y_train) g.ndata['cls_feats'] = th.zeros((nb_node, model.feat_dim)) logger.info('graph information:') logger.info(str(g)) # create index loader train_idx = Data.TensorDataset(th.arange(0, nb_train, dtype=th.long)) val_idx = Data.TensorDataset( th.arange(nb_train, nb_train + nb_val, dtype=th.long)) test_idx = Data.TensorDataset( th.arange(nb_node - nb_test, nb_node, dtype=th.long))
def get_graph_dgl(device=None): adj = get_scipy_adj() G = dgl.from_scipy(adj, device=device) return G
def create_dgl_graphs(dir): edge_data_path = dir + "scipy_graphs/" node_data_by_month = [ pd.read_csv(dir + "surge_2019-0" + str(i) + ".csv") for i in range(1, 7) ] for df in node_data_by_month: df["interval_datetime"] = pd.to_datetime(df["interval_datetime"], format='%Y-%m-%d %H:%M:%S', errors='ignore') dgl_graphs = [] i = 0 for graph_file in os.listdir(edge_data_path): sparse_adj = sparse.load_npz(edge_data_path + graph_file) weights = th.tensor(list(sparse_adj.data), dtype=th.int32) month_num, interval_num = graph_file.split("-") month_num = int(month_num.split("_")[-1]) interval_num = int(interval_num.split(".")[0]) - 1 cur_interval_start = datetime.datetime(2019, month_num, 1, 0, 00, 0) + datetime.timedelta( 0, 10 * 60 * interval_num) node_DF = node_data_by_month[month_num - 1][node_data_by_month[ month_num - 1]["interval_datetime"] == cur_interval_start] label_DF = node_data_by_month[month_num - 1][ node_data_by_month[month_num - 1]["interval_datetime"] == cur_interval_start + datetime.timedelta(0, 10 * 60)] node_labels = th.from_numpy( label_DF[label_DF.columns[9:]].values.astype(int).T) node_base_features = node_DF[[ 'is_holiday', "PU_time_2AM", "PU_time_6AM", "PU_time_10AM", "PU_time_2PM", "PU_time_6PM", "PU_time_10PM" ]].values.astype(int)[0] node_surge_features = node_DF[node_DF.columns[9:]].values.astype(int) node_base_features = np.array( [node_base_features for i in range(node_surge_features.size)]) node_features = th.from_numpy( np.vstack([node_surge_features, node_base_features.T]).T) g = dgl.from_scipy(sparse_adj) g.edata['feature'] = weights g.ndata['feature'] = node_features g.ndata['label'] = node_labels train_mask = np.random.randint(0, 10, size=len(node_labels)) test_mask = np.where(train_mask == -1, 1, 0).astype(bool) val_mask = np.where(train_mask == -1, 1, 0).astype(bool) train_mask = np.where(train_mask > 0, 1, 0).astype(bool) g.ndata['train_mask'] = th.from_numpy(train_mask) g.ndata['test_mask'] = th.from_numpy(test_mask) g.ndata['val_mask'] = th.from_numpy(val_mask) g.add_edges(g.nodes(), g.nodes()) dgl_graphs.append(g) i += 1 if i > 100: break np.random.shuffle(dgl_graphs) return dgl_graphs
parser.add_argument('--savemodelpath', type=str, default='stgcnwavemodel.pt', help='save model path') parser.add_argument('--pred_len', type=int, default=5, help='how many steps away we want to predict') parser.add_argument('--control_str', type=str, default='TNTSTNTST', help='model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer') parser.add_argument('--channels', type=int, nargs='+', default=[1, 16, 32, 64, 32, 128], help='model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer') args = parser.parse_args() device = torch.device("cuda") if torch.cuda.is_available() and not args.disablecuda else torch.device("cpu") with open(args.sensorsfilepath) as f: sensor_ids = f.read().strip().split(',') distance_df = pd.read_csv(args.disfilepath, dtype={'from': 'str', 'to': 'str'}) adj_mx = get_adjacency_matrix(distance_df, sensor_ids) sp_mx = sp.coo_matrix(adj_mx) G = dgl.from_scipy(sp_mx) df = pd.read_hdf(args.tsfilepath) num_samples, num_nodes = df.shape tsdata = df.to_numpy() n_his = args.window save_path = args.savemodelpath n_pred = args.pred_len
import dgl from mxnet import nd import scipy.sparse as sp spmat = sp.rand(4, 4, format='csr', density=0.5) # 50% nonzero entries 一半的边是存在的 # dgl 必须接受方形矩阵 但是 scipy可以生成矩形矩阵 print(dgl.from_scipy(spmat), '\n matric of spmat: \n', spmat) from scipy.sparse import rand matrix = rand(3, 4, density=0.25, format="csr", random_state=42) # 生成一个矩形矩阵 print(matrix.todense()) # todense 生成稀疏矩阵的密集表示 import networkx as nx nx_g = nx.path_graph(5) # 生成一个单链 0-1-2-3-4 print(dgl.from_networkx(nx_g)) # 这里有八条边 因为Networkx生成的是无向图 nxg = nx.DiGraph([(2, 1), (1, 2), (2, 3), (0, 0)]) # 使用networkx中的 DiGraph方法可以避免上面的问题 print(dgl.from_networkx(nxg)) # 在github收藏了很多示例代码