def train(step, model, optimizer, lr_scheduler, mm_scheduler, transform_1, transform_2, data, args): model.train() # update learning rate lr = lr_scheduler.get(step) for param_group in optimizer.param_groups: param_group['lr'] = lr # update momentum mm = 1 - mm_scheduler.get(step) # forward optimizer.zero_grad() x1, x2 = transform_1(data), transform_2(data) if args.dataset != 'ppi': x1, x2 = dgl.add_self_loop(x1), dgl.add_self_loop(x2) q1, y2 = model(x1, x2) q2, y1 = model(x2, x1) loss = 2 - cosine_similarity(q1, y2.detach(), dim=-1).mean() - cosine_similarity( q2, y1.detach(), dim=-1).mean() loss.backward() # update online network optimizer.step() # update target network model.update_target_network(mm) return loss.item()
def data_split(g, args): index = data_sample(g, args) # num_nodes = g.number_of_nodes() # train_mask = g.ndata['train_mask'] # val_mask = g.ndata['val_mask'] # test_mask = g.ndata['test_mask'] # train_ind = torch.where(train_mask == True)[0].tolist() # val_ind = torch.where(val_mask == True)[0].tolist() # test_ind = torch.where(test_mask == True)[0].tolist() # else_ind = list(set(range(num_nodes)) - # set(train_ind) - set(val_ind) - set(test_ind)) # shuffle(train_ind, val_ind, test_ind, else_ind) # shard = [int(len(train_ind)/args.split), int(len(val_ind)/args.split), # int(len(test_ind)/args.split), int(len(else_ind)/args.split)] # ind = [train_ind[i*shard[0]:(i+1)*shard[0]] + val_ind[i*shard[1]:(i+1)*shard[1]] + test_ind[i*shard[2]:( # i+1)*shard[2]] + else_ind[i*shard[3]:(i+1)*shard[3]] for i in range(args.split)] graphs = [node_subgraph(g, index[i]) for i in range(args.split)] for i in range(len(graphs)): # graphs[i].int().to(args.device) # add self loop graphs[i] = remove_self_loop(graphs[i]) graphs[i] = add_self_loop(graphs[i]) return graphs
def forward(self, g, feats): """Update node representations. Parameters ---------- g : DGLGraph DGLGraph for a batch of graphs feats : FloatTensor of shape (N, M1) * N is the total number of nodes in the batch of graphs * M1 is the input node feature size, which equals in_feats in initialization Returns ------- feats : FloatTensor of shape (N, M2) * N is the total number of nodes in the batch of graphs * M2 is the output node representation size, which equals hidden_sizes[-1] in initialization. """ deg = None max_deg = None deg_membership = None g_self = dgl.add_self_loop(g) for gnn in self.gnn_layers: feats, deg, max_deg, deg_membership = gnn(g, g_self, feats, deg, max_deg, deg_membership) return feats
def DGLDatasetReader(dataset_name, self_loops, device=None): data = load_data(dataset_name, self_loops) if dataset_name == 'reddit': g = data.graph.int().to(device) # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 g.ndata['norm'] = norm.unsqueeze(1) else: g = DGLGraph(data.graph).to(device) # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 norm = norm.to(device) g.ndata['norm'] = norm.unsqueeze(1) # add self loop if self_loops: # g.add_edges(g.nodes(), g.nodes()) g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) return g,torch.FloatTensor(data.features),torch.LongTensor(data.labels),data.num_labels,\ torch.ByteTensor(data.train_mask),torch.ByteTensor(data.test_mask),torch.ByteTensor(data.val_mask)
def train(args): data = load_citation_dataset(args.dataset) g = data[0] features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) num_heads = [args.num_heads] * (args.num_layers - 1) + [args.num_out_heads] model = GAT(features.shape[1], args.num_hidden, data.num_classes, num_heads, args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.epochs): model.train() logits = model(g, features) loss = F.cross_entropy(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc = accuracy(logits[train_mask], labels[train_mask]) val_acc = evaluate(model, g, features, labels, val_mask) print('Epoch {:04d} | Loss {:.4f} | Train Acc {:.4f} | Val Acc {:.4f}'. format(epoch, loss.item(), train_acc, val_acc)) print() acc = evaluate(model, g, features, labels, test_mask) print('Test Accuracy {:.4f}'.format(acc))
def train(args): set_random_seed(args.seed) device = get_device(args.device) data, g, feats, labels, predict_ntype, relations, neighbor_sizes, \ pos, pos_threshold, train_mask, val_mask, test_mask = load_data(args.dataset, device) bgs = [g[rel] for rel in relations] # 邻居-目标顶点二分图 mgs = [ dgl.add_self_loop( dgl.remove_self_loop(dgl.metapath_reachable_graph(g, mp))).to(device) for mp in data.metapaths ] # 基于元路径的邻居同构图 model = HeCo([feat.shape[1] for feat in feats], args.num_hidden, args.feat_drop, args.attn_drop, neighbor_sizes, len(data.metapaths), args.tau, args.lambda_).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) for epoch in range(args.epochs): model.train() loss = model(bgs, mgs, feats, pos) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch {:d} | Train Loss {:.4f}'.format(epoch, loss.item())) evaluate(model, mgs, feats[0], labels, data.num_classes, train_mask, test_mask, args.seed)
def add_recipe(step): g, mp = step n = g.number_of_nodes() items = list(range(18, n, 1)) size = np.random.randint(len(items)) us = np.random.choice(items, size=size) us = torch.tensor(us) us = torch.tensor(us, dtype=torch.int32).to(device) vs = np.zeros((size, )) + (n - 1) vs = torch.tensor(vs, dtype=torch.int32).to(device) norms = torch.ones(len(vs)) norms = torch.tensor(norms, dtype=torch.float32).to(device) types = torch.tensor(torch.ones(len(vs)).to(device) + 3, dtype=torch.int32).to(device) g.add_edges(us, vs, data={"rel_type": types, "norm": norms}) g.add_edges(vs, us, data={"rel_type": types, "norm": norms}) g = dgl.add_self_loop(g) return (g, mp)
def dgl_graph_from_vec(vec, graph_params): """ Create graph from flatten vector as a thresholed weighted matrix with properties as type torch """ if graph_params.flatten: W = vec_to_sym(vec) else: W = vec # create graph # add signal on nodes u = getattr(feature_generation, graph_params.node_feat)(W) if graph_params.thr_type == 'pos': W[W < graph_params.threshold] = 0 else: W[np.abs(W) < graph_params.threshold] = 0 # convert to pytorch? W = sparse.csr_matrix(W).tocoo() edge_weight = torch.tensor(W.data).float() u = torch.from_numpy(u.astype(np.float32)) g = dgl.from_scipy(W) g.ndata['feat'] = u g.edata['weight'] = edge_weight if graph_params.add_self_loop: g = dgl.add_self_loop(g) g.edata['weight'][-graph_params.n_nodes:] = 1 return g
def __call__(self, split_type): if split_type == 'train': subsample_ratio = self.subsample_ratio else: subsample_ratio = 1 pos_edges = self.split_edge[split_type]['edge'] if split_type == 'train': # Adding self loop in train avoids sampling the source node itself. g = add_self_loop(self.g) eids = g.edge_ids(pos_edges[:, 0], pos_edges[:, 1]) neg_edges = torch.stack(self.neg_sampler(g, eids), dim=1) else: neg_edges = self.split_edge[split_type]['edge_neg'] pos_edges = self.subsample(pos_edges, subsample_ratio).long() neg_edges = self.subsample(neg_edges, subsample_ratio).long() edges = torch.cat([pos_edges, neg_edges]) labels = torch.cat([ torch.ones(pos_edges.size(0), 1), torch.zeros(neg_edges.size(0), 1) ]) if self.shuffle: perm = torch.randperm(edges.size(0)) edges = edges[perm] labels = labels[perm] return edges, labels
def gat_data(g, n_classes, gpu): # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() num_feats = g.ndata['feat'].shape[1] return n_classes, n_edges, g, num_feats
def to_dgl(self: GraphFeaturiser, mol: Mol) -> dgl.DGLGraph: """Generates a DGL graph from a molecule. Args: mol: The molecule to featurise. Returns: A DGL graph of the featurised molecule. """ num_atoms = mol.GetNumAtoms() bonds = mol.GetBonds() bond_from = [bond.GetBeginAtomIdx() for bond in bonds] bond_to = [bond.GetEndAtomIdx() for bond in bonds] g = dgl.graph((torch.tensor(bond_from), torch.tensor(bond_to)), num_nodes=num_atoms) for key, atom_featuriser in self.atom_featurisers.items(): atom_features = atom_featuriser.process_molecule(mol) g.ndata[key] = torch.tensor(atom_features, dtype=torch.float) for key, bond_featuriser in self.bond_featurisers.items(): bond_features = [ bond_featuriser.process_bond(bond) for bond in bonds ] g.edata[key] = torch.tensor(bond_features, dtype=torch.float) g = dgl.add_reverse_edges(g, copy_edata=True) if self.add_self_loops: g = dgl.add_self_loop(g) return g
def get_graph(glass_tc_pos_path, control_tc_pos_path, threshold, weight, only_control: bool = True): adj_matrix, pos_glass, pos_control = get_adj_matrix( glass_tc_pos_path, control_tc_pos_path, threshold, weight, only_control) num_state_node = pos_glass.shape[0] total_node = adj_matrix.shape[0] u = torch.nonzero(adj_matrix)[:, 0] v = torch.nonzero(adj_matrix)[:, 1] # 0th node ~ 'state dim-1'th node : state node (glass TC) # Others : action node (control TC) g = dgl.graph((u, v), num_nodes=total_node) g = dgl.add_self_loop(g) # Add control node flags is_control = torch.zeros(total_node, 1) is_control[num_state_node:, 0] = 1 g.ndata['is_control'] = is_control # handling positional information. scaler = MinMaxScaler() pos = np.concatenate([pos_glass, pos_control], axis=0) pos_std = scaler.fit_transform(pos) g.ndata['position'] = torch.from_numpy(pos_std).float() return g
def detect_body_point_and_save(img_filenames, imgs, save_name, label): # 若已存在对应的.npy,则直接导入,不再重新检测关键点 if os.path.exists(save_name): print('Body Point data have already existed') bodies_points = np.load(save_name) else: # 若无对应.npy,则基于openpose进行人体关键点检测 bodies_points = [] for img in imgs: # print(img_filenames + img) body_point = get_body_points(img_filenames + img) bodies_points.append(body_point) bodies_points_array = np.array(bodies_points) np.save(save_name, bodies_points_array) body_graph = [] # 空列表用于存储graph for body_points in bodies_points: # 遍历所有检测图片结果 for body_point in body_points: # 由于某些图片中存在多个人,所以再次再同一张图片结果中遍历所有人体 g = dgl.DGLGraph((src, dst)) # 建立graph g = dgl.add_self_loop(g) plt.figure() pos = body_point[:, 0:2] # 取前两列坐标(第三列为置信度),作为显示时的节点坐标 pos[:, 1] = -pos[:, 1] # 由于默认显示坐标原点再左上角,鉴于人眼观察习惯,将其坐标上下翻转 nx.draw(g.to_networkx(), pos=pos, with_labels=True) # 显示graph node_feature = body_point g.ndata['coordinate'] = torch.tensor(node_feature) body_graph.append([g, torch.tensor([label]).float()]) return body_graph
def compute_representations(net, dataset, device): r"""Pre-computes the representations for the entire data. Returns: [torch.Tensor, torch.Tensor]: Representations and labels. """ net.eval() reps = [] labels = [] if len(dataset) == 1: g = dataset[0] g = dgl.add_self_loop(g) g = g.to(device) with torch.no_grad(): reps.append(net(g)) labels.append(g.ndata['label']) else: for g in dataset: # forward g = g.to(device) with torch.no_grad(): reps.append(net(g)) labels.append(g.ndata['label']) reps = torch.cat(reps, dim=0) labels = torch.cat(labels, dim=0) return [reps, labels]
def train_gcn(args): exp_init(args.seed, gpu_id=args.gpu) # ! config cf = GraphSageConfig(args) cf.device = th.device("cuda:0" if args.gpu >= 0 else "cpu") # ! Load Graph g, features, n_feat, cf.n_class, labels, train_x, val_x, test_x = preprocess_data(cf.dataset, cf.train_percentage) features = features.to(cf.device) g = dgl.add_self_loop(g).to(cf.device) supervision = SimpleObject({'train_x': train_x, 'val_x': val_x, 'test_x': test_x, 'labels': labels}) # ! Train Init print(f'{cf}\nStart training..') model = SAGE(n_feat, cf.n_hidden, cf.n_class, cf.n_layer, F.relu, cf.dropout, cf.aggregator) model.to(cf.device) print(model) optimizer = th.optim.Adam( model.parameters(), lr=cf.lr, weight_decay=cf.weight_decay) if cf.early_stop > 0: stopper = EarlyStopping(patience=cf.early_stop, path=cf.checkpoint_file) else: stopper = None # ! Train trainer = FullBatchTrainer(model=model, g=g, cf=cf, features=features, sup=supervision, stopper=stopper, optimizer=optimizer, loss_func=th.nn.CrossEntropyLoss()) trainer.run() trainer.eval_and_save() return cf
def train(args): data = load_citation_dataset(args.dataset) g = data[0] features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) model = GCN(features.shape[1], args.num_hidden, data.num_classes) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4) for epoch in range(args.epochs): model.train() logits = model(g, features) loss = criterion(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() acc = accuracy(logits[val_mask], labels[val_mask]) print('Epoch {:05d} | Loss {:.4f} | ValAcc {:.4f}'.format( epoch, loss.item(), acc)) acc = accuracy(model(g, features)[test_mask], labels[test_mask]) print('Test Accuracy {:.4f}'.format(acc))
def train(args): set_random_seed(args.seed) if args.hetero: data = HETERO_DATASET[args.dataset]() g = data[0] gs = [ dgl.metapath_reachable_graph(g, metapath) for metapath in data.metapaths ] for i in range(len(gs)): gs[i] = dgl.add_self_loop(dgl.remove_self_loop(gs[i])) ntype = data.predict_ntype num_classes = data.num_classes features = g.nodes[ntype].data['feat'] labels = g.nodes[ntype].data['label'] train_mask = g.nodes[ntype].data['train_mask'] val_mask = g.nodes[ntype].data['val_mask'] test_mask = g.nodes[ntype].data['test_mask'] else: data = DATASET[args.dataset]() gs = data[0] num_classes = data.num_classes features = gs[0].ndata['feat'] labels = gs[0].ndata['label'] train_mask = gs[0].ndata['train_mask'] val_mask = gs[0].ndata['val_mask'] test_mask = gs[0].ndata['test_mask'] model = HAN(len(gs), features.shape[1], args.num_hidden, num_classes, args.num_heads, args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) score = micro_macro_f1_score if args.task == 'clf' else nmi_ari_score if args.task == 'clf': metrics = 'Epoch {:d} | Train Loss {:.4f} | Train Micro-F1 {:.4f} | Train Macro-F1 {:.4f}' \ ' | Val Micro-F1 {:.4f} | Val Macro-F1 {:.4f}' else: metrics = 'Epoch {:d} | Train Loss {:.4f} | Train NMI {:.4f} | Train ARI {:.4f}' \ ' | Val NMI {:.4f} | Val ARI {:.4f}' for epoch in range(args.epochs): model.train() logits = model(gs, features) loss = F.cross_entropy(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_metrics = score(logits[train_mask], labels[train_mask]) val_metrics = score(logits[val_mask], labels[val_mask]) print(metrics.format(epoch, loss.item(), *train_metrics, *val_metrics)) test_metrics = evaluate(model, gs, features, labels, test_mask, score) if args.task == 'clf': print('Test Micro-F1 {:.4f} | Test Macro-F1 {:.4f}'.format( *test_metrics)) else: print('Test NMI {:.4f} | Test ARI {:.4f}'.format(*test_metrics))
def create_elliptic_dataset_graph(df_features, df_edges, df_features_orig=None, encode_nodes=False, create_masks=False, graph_type='pytorch-geometric'): ''' df_features_orig - датафрейм фичей, который нужно использовать в качестве фичей в графе (для случая с EvolveGCN, когда требуется не меняющаяся матрица фичей в темпоральном датасете) ''' assert graph_type in ['pytorch-geometric', 'dgl'] df_features_orig = df_features if df_features_orig is None else df_features_orig df_edges = df_edges.copy() df_edges = df_edges[df_edges['txId1'].isin(df_features[0]) | df_edges['txId2'].isin(df_features[0])] edge_index = df_edges.to_numpy().T edge_index = torch.LongTensor(edge_index).contiguous() weights = None node_features = df_features_orig.copy() y = torch.LongTensor(node_features['class'].values) if encode_nodes: node_encoder = OrderedEncoder() node_encoder.fit(node_features[0]) edge_index = torch.LongTensor(node_encoder.transform(edge_index)) node_features = node_features.drop([0, 'class', 1], axis=1) node_features = torch.FloatTensor(node_features.values) if graph_type == 'pytorch-geometric': data = torch_geometric_graph(x=node_features, edge_index=edge_index, edge_attr=weights, y=y) num_nodes = data.num_nodes else: U, V = edge_index[0, :], edge_index[1, :] data = dgl_graph((U, V)) data.ndata['feat'] = node_features data.ndata['label'] = y data = dgl.add_self_loop(data) num_nodes = data.num_nodes() if create_masks: train_idx, test_idx = train_test_split(np.arange(num_nodes), test_size=0.15, random_state=42, stratify=data.get_node_labels()) data.train_mask = torch.BoolTensor([(node in train_idx) for node in np.arange(num_nodes)]) data.test_mask = torch.BoolTensor([(node in test_idx) for node in np.arange(num_nodes)]) return data
def __init__(self, batch_size: int): super().__init__() dataset = DglNodePropPredDataset(name='ogbn-arxiv') self.split_idx = dataset.get_idx_split() self.g, labels = dataset[0] self.g.ndata["label"] = labels.squeeze() self.g = add_self_loop(self.g) self.batch_size = batch_size
def networkx_to_torch(self, networkx_graph): import dgl # graph = dgl.DGLGraph() graph = dgl.from_networkx(networkx_graph) graph = dgl.remove_self_loop(graph) graph = dgl.add_self_loop(graph) graph = graph.to(self.device) return graph
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # dataset = LegacyTUDataset(args.dataset, raw_dir=args.dataset_path) # add self loop. We add self loop for each graph here since the function "add_self_loop" does not # support batch graph. for i in range(len(dataset)): dataset.graph_lists[i] = dgl.add_self_loop(dataset.graph_lists[i]) num_training = int(len(dataset) * 0.8) num_val = int(len(dataset) * 0.1) num_test = len(dataset) - num_val - num_training train_set, val_set, test_set = random_split(dataset, [num_training, num_val, num_test]) train_loader = GraphDataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=6) val_loader = GraphDataLoader(val_set, batch_size=args.batch_size, num_workers=2) test_loader = GraphDataLoader(test_set, batch_size=args.batch_size, num_workers=2) device = torch.device(args.device) # Step 2: Create model =================================================================== # num_feature, num_classes, _ = dataset.statistics() model_op = get_sag_network(args.architecture) model = model_op(in_dim=num_feature, hid_dim=args.hid_dim, out_dim=num_classes, num_convs=args.conv_layers, pool_ratio=args.pool_ratio, dropout=args.dropout).to(device) args.num_feature = int(num_feature) args.num_classes = int(num_classes) # Step 3: Create training components ===================================================== # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # Step 4: training epoches =============================================================== # bad_cound = 0 best_val_loss = float("inf") final_test_acc = 0. best_epoch = 0 train_times = [] for e in range(args.epochs): s_time = time() train_loss = train(model, optimizer, train_loader, device) train_times.append(time() - s_time) val_acc, val_loss = test(model, val_loader, device) test_acc, _ = test(model, test_loader, device) if best_val_loss > val_loss: best_val_loss = val_loss final_test_acc = test_acc bad_cound = 0 best_epoch = e + 1 else: bad_cound += 1 if bad_cound >= args.patience: break if (e + 1) % args.print_every == 0: log_format = "Epoch {}: loss={:.4f}, val_acc={:.4f}, final_test_acc={:.4f}" print(log_format.format(e + 1, train_loss, val_acc, final_test_acc)) print("Best Epoch {}, final test acc {:.4f}".format(best_epoch, final_test_acc)) return final_test_acc, sum(train_times) / len(train_times)
def subsampled_batched_mesh_dgl(self, batchsize, faces_percentage=0.7): self.__subsample_faces(faces_percentage) mesh = self.__dgl_mesh() batch = [mesh for i in range(batchsize)] batched_mesh = dgl.batch(batch) batched_mesh = dgl.add_self_loop(batched_mesh) return batched_mesh
def giveGraphs(self, batch_size, voxel_pos): p2v = np.load("data/p2v_spec.npy", allow_pickle=True).tolist() p2v = [item for sublist in p2v for item in sublist] p2p = np.load("data/p2p.npy", allow_pickle=True).tolist() p2p = [item for sublist in p2p for item in sublist] v2v = np.load("data/v2v.npy", allow_pickle=True).tolist() v2v = [item for sublist in v2v for item in sublist] v2v_6 = np.load("data/v2v_6.npy", allow_pickle=True).tolist() v2v_6 = [item for sublist in v2v_6 for item in sublist] G_vox = dgl.graph(v2v) G_vox = dgl.add_self_loop(G_vox) graph_data = {('PMT', 'p2v', 'vox'): p2v, ('vox', 'v2v', 'vox'): v2v} g = dgl.heterograph(graph_data) g = dgl.to_homogeneous(g) g = dgl.add_self_loop(g) G = dgl.batch([g for i in range(batch_size)]) return G, G_vox
def load_dataset(): dataset = DglNodePropPredDataset(name='ogbn-arxiv') split_idx = dataset.get_idx_split() # there is only one graph in Node Property Prediction datasets g, labels = dataset[0] g = dgl.add_self_loop(g) return g, labels, split_idx
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # dataset = LegacyTUDataset(args.dataset, raw_dir=args.dataset_path) # add self loop. We add self loop for each graph here since the function "add_self_loop" does not # support batch graph. for i in range(len(dataset)): dataset.graph_lists[i] = dgl.remove_self_loop(dataset.graph_lists[i]) dataset.graph_lists[i] = dgl.add_self_loop(dataset.graph_lists[i]) # preprocess: use node degree/label as node feature if args.degree_as_feature: dataset = degree_as_feature(dataset) mode = "concat" else: mode = "replace" dataset = node_label_as_feature(dataset, mode=mode) num_training = int(len(dataset) * 0.9) num_test = len(dataset) - num_training train_set, test_set = random_split(dataset, [num_training, num_test]) train_loader = GraphDataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=1) test_loader = GraphDataLoader(test_set, batch_size=args.batch_size, num_workers=1) device = torch.device(args.device) # Step 2: Create model =================================================================== # num_feature, num_classes, _ = dataset.statistics() args.in_dim = int(num_feature) args.out_dim = int(num_classes) args.edge_feat_dim = 0 # No edge feature in datasets that we use. model = GraphClassifier(args).to(device) # Step 3: Create training components ===================================================== # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad=True, weight_decay=args.weight_decay) # Step 4: training epoches =============================================================== # best_test_acc = 0.0 best_epoch = -1 train_times = [] for e in range(args.epochs): s_time = time() train_loss = train(model, optimizer, train_loader, device, e, args.epochs) train_times.append(time() - s_time) test_acc = test(model, test_loader, device) if test_acc > best_test_acc: best_test_acc = test_acc best_epoch = e + 1 if (e + 1) % args.print_every == 0: log_format = "Epoch {}: loss={:.4f}, test_acc={:.4f}, best_test_acc={:.4f}" print(log_format.format(e + 1, train_loss, test_acc, best_test_acc)) print("Best Epoch {}, final test acc {:.4f}".format(best_epoch, best_test_acc)) return best_test_acc, sum(train_times) / len(train_times)
def forward(self, graph, feat): """""" # add self_loop graph = dgl.remove_self_loop(graph) graph = dgl.add_self_loop(graph) # prepare x = feat x = torch.mm(x, self.weight).view(-1, self.heads, self.out_channels) out = self.propagate(graph, x) return out
def forward(self, graph: dgl.DGLGraph, *__args, **__kwargs) -> _typing.Sequence[torch.Tensor]: graph = dgl.remove_self_loop(graph) graph = dgl.add_self_loop(graph) h = graph.ndata['feat'] hidden_rep = [h] for i in range(self.__num_layers): h = self.__gcn_layers[i](graph, h) h = self.__batch_normalizations[i](h) h = torch.nn.functional.relu(h) hidden_rep.append(h) return hidden_rep
def state(self): features = torch.stack([ x for i, x in enumerate(self.dataset.ndata['feat']) if i in self.picked_nodes ]) features = np.array(features) graph = dgl.add_self_loop(self.graph) graph = dgl.to_networkx(graph).to_undirected() self.embedding_model.fit(graph, features) embedding = self.embedding_model.get_embedding() embedding = np.sum(embedding, axis=0) return embedding
def pass_data_iteratively(model, graphs, minibatch_size=64): model.eval() output = [] idx = np.arange(len(graphs)) for i in range(0, len(graphs), minibatch_size): sampled_idx = idx[i:i + minibatch_size] if len(sampled_idx) == 0: continue output.append( model([dgl.add_self_loop(graphs[j][0]) for j in sampled_idx]).detach()) return torch.cat(output, 0)
def test_sg_conv(out_dim): g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx()) g = dgl.add_self_loop(g) ctx = F.ctx() sgc = nn.SGConv(5, out_dim, 2) sgc.initialize(ctx=ctx) print(sgc) # test #1: basic h0 = F.randn((g.number_of_nodes(), 5)) h1 = sgc(g, h0) assert h1.shape == (g.number_of_nodes(), out_dim)