def ConvGraphLoad(dir='data/'): graph_list = os.listdir(dir) graph_list_tuple = [ tuple(graph_name.split('_', 2)) for graph_name in graph_list ] graph_list_tuple.sort(key=lambda tup: int(tup[1])) graph_list_tuple.sort(key=lambda tup: int(tup[0])) print('Graph List sorted:', graph_list_tuple) glist = [] labellist = [] for i in range(len(graph_list_tuple) - 2): graph_step_name = graph_list_tuple[i][0] + '_' + graph_list_tuple[i][ 1] + '_' + graph_list_tuple[i][2] graph_next_step_name = graph_list_tuple[ i + 1][0] + '_' + graph_list_tuple[ i + 1][1] + '_' + graph_list_tuple[i + 1][2] g, _ = dgl.load_graphs(dir + graph_step_name) g = g[0] g_next, _ = dgl.load_graphs(dir + graph_next_step_name) g_next = g_next[0] if g.number_of_nodes() == g_next.number_of_nodes() and int( graph_list_tuple[i][1]) <= 491 and int( graph_list_tuple[i][1]) > 250: glist.append(g) labellist.append(g_next.ndata['value']) return glist, labellist
def get_graph(name, format): g = None if name == 'cora': g = dgl.data.CoraGraphDataset()[0] elif name == 'livejournal': bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format) if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] else: g = get_livejournal().formats([format]) dgl.save_graphs(bin_path, [g]) elif name == "friendster": bin_path = "/tmp/dataset/friendster/friendster_{}.bin".format(format) if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] else: g = get_friendster().formats([format]) dgl.save_graphs(bin_path, [g]) elif name == "reddit": bin_path = "/tmp/dataset/reddit/reddit_{}.bin".format(format) if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] else: g = dgl.data.RedditDataset(self_loop=True)[0].formats([format]) dgl.save_graphs(bin_path, [g]) else: raise Exception("Unknown dataset") g = g.formats([format]) return g
def load_building_block_data(): with open(f"{PROCESSED_DATA_DIR}/building_block_smis.pt", "rb") as f: building_block_smis = torch.load(f) building_block_molgraphs, _ = dgl.load_graphs( f"{PROCESSED_DATA_DIR}/building_block_molgraphs.pt") return building_block_smis, building_block_molgraphs
def load_nowplaying_rs(): import torchtext.legacy as torchtext # follow examples/pytorch/pinsage/README to create train_g.bin name = 'train_g.bin' dataset_dir = os.path.join(os.getcwd(), 'dataset') os.symlink('/tmp/dataset/', dataset_dir) dataset_path = os.path.join(dataset_dir, "nowplaying_rs", name) g_list, _ = dgl.load_graphs(dataset_path) g = g_list[0] user_ntype = 'user' item_ntype = 'track' # Assign user and movie IDs and use them as features (to learn an individual trainable # embedding for each entity) g.nodes[user_ntype].data['id'] = torch.arange( g.number_of_nodes(user_ntype)) g.nodes[item_ntype].data['id'] = torch.arange( g.number_of_nodes(item_ntype)) # Prepare torchtext dataset and vocabulary fields = {} examples = [] for i in range(g.number_of_nodes(item_ntype)): example = torchtext.data.Example.fromlist([], []) examples.append(example) textset = torchtext.data.Dataset(examples, fields) return PinsageDataset(g, user_ntype, item_ntype, textset)
def test_serialize_heterograph(): f = tempfile.NamedTemporaryFile(delete=False) path = f.name f.close() g_list0 = create_heterographs2(F.int64) + create_heterographs2(F.int32) dgl.save_graphs(path, g_list0) g_list, _ = dgl.load_graphs(path) assert g_list[0].idtype == F.int64 assert len(g_list[0].canonical_etypes) == 3 for i in range(len(g_list0)): for j, etypes in enumerate(g_list0[i].canonical_etypes): assert g_list[i].canonical_etypes[j] == etypes #assert g_list[1].restrict_format() == 'any' #assert g_list[2].restrict_format() == 'csr' assert g_list[4].idtype == F.int32 assert np.allclose(F.asnumpy(g_list[2].nodes['user'].data['hh']), np.ones((4, 5))) assert np.allclose(F.asnumpy(g_list[6].nodes['user'].data['hh']), np.ones((4, 5))) edges = g_list[0]['follows'].edges() assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2])) assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3])) for i in range(len(g_list)): assert g_list[i].ntypes == g_list0[i].ntypes assert g_list[i].etypes == g_list0[i].etypes # test set feature after load_graph g_list[3].nodes['user'].data['test'] = F.tensor([0, 1, 2, 4]) g_list[3].edata['test'] = F.tensor([0, 1, 2]) os.unlink(path)
def PEMS_BAYGraphDataset(): if not os.path.exists('data/graph_bay.bin'): if not os.path.exists('data'): os.mkdir('data') download_file('graph_bay.bin') g, _ = dgl.load_graphs('data/graph_bay.bin') return g[0]
def METR_LAGraphDataset(): if not os.path.exists('data/graph_la.bin'): if not os.path.exists('data'): os.mkdir('data') download_file('graph_la.bin') g, _ = dgl.load_graphs('data/graph_la.bin') return g[0]
def test_graph_serialize_without_feature(is_hetero): num_graphs = 100 g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)] # create a temporary file and immediately release it so DGL can open it. f = tempfile.NamedTemporaryFile(delete=False) path = f.name f.close() dgl.save_graphs(path, g_list) idx_list = np.random.permutation(np.arange(num_graphs)).tolist() loadg_list, _ = dgl.load_graphs(path, idx_list) idx = idx_list[0] load_g = loadg_list[0] assert F.allclose(load_g.nodes(), g_list[idx].nodes()) load_edges = load_g.all_edges('uv', 'eid') g_edges = g_list[idx].all_edges('uv', 'eid') assert F.allclose(load_edges[0], g_edges[0]) assert F.allclose(load_edges[1], g_edges[1]) os.unlink(path)
def __call__(self, split_type): if split_type == 'train': subsample_ratio = self.subsample_ratio else: subsample_ratio = 1 path = osp.join( self.save_dir or '', '{}_{}_{}-hop_{}-subsample.bin'.format(self.prefix, split_type, self.hop, subsample_ratio)) if osp.exists(path): self.print_fn( "Load existing processed {} files".format(split_type)) graph_list, data = dgl.load_graphs(path) dataset = GraphDataSet(graph_list, data['labels']) else: self.print_fn("Processed {} files not exist.".format(split_type)) edges, labels = self.generator(split_type) self.print_fn("Generate {} edges totally.".format(edges.size(0))) graph_list, labels = self.sampler(edges, labels) dataset = GraphDataSet(graph_list, labels) dgl.save_graphs(path, graph_list, {'labels': labels}) self.print_fn("Save preprocessed subgraph to {}".format(path)) return dataset
def main(): args = parse_args() print(args) device = get_device(args.device) data, g, _, labels, predict_ntype, train_idx, val_idx, test_idx, evaluator = \ load_data(args.dataset, device) add_node_feat(g, 'pretrained', args.node_embed_path, True) if args.dataset == 'oag-venue': labels[labels == -1] = 0 (*mgs, pos_g), _ = dgl.load_graphs(args.pos_graph_path) pos_g = pos_g.to(device) model = RHCO( {ntype: g.nodes[ntype].data['feat'].shape[1] for ntype in g.ntypes}, args.num_hidden, data.num_classes, args.num_rel_hidden, args.num_heads, g.ntypes, g.canonical_etypes, predict_ntype, args.num_layers, args.dropout, len(mgs), args.tau, args.lambda_).to(device) model.load_state_dict(torch.load(args.model_path, map_location=device)) model.eval() base_pred = model.get_embeds(g, mgs, args.neighbor_size, args.batch_size, device) mask = torch.cat([train_idx, val_idx]) logits = smooth(base_pred, pos_g, labels, mask, args) _, _, test_acc, _, _, test_f1 = calc_metrics(logits, labels, train_idx, val_idx, test_idx, evaluator) print('After smoothing: Test Acc {:.4f} | Test Macro-F1 {:.4f}'.format( test_acc, test_f1))
def get_canoncial_etypes(self): for _, _, file_names in sorted(os.walk(self.dir)): for i in range(1): g_sample_name = file_names[i] g_sample_path = os.path.join(self.dir, g_sample_name) g_sample, _ = dgl.load_graphs(g_sample_path) g_sample = g_sample[0] return g_sample.canonical_etypes
def get_graph(name, format=None): # global GRAPH_CACHE # if name in GRAPH_CACHE: # return GRAPH_CACHE[name].to(format) if isinstance(format, str): format = [format] # didn't specify format if format is None: format = ['csc', 'csr', 'coo'] g = None if name == 'cora': g = dgl.data.CoraGraphDataset(verbose=False)[0] elif name == 'pubmed': g = dgl.data.PubmedGraphDataset(verbose=False)[0] elif name == 'livejournal': bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format) if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] else: g = get_livejournal().formats(format) dgl.save_graphs(bin_path, [g]) elif name == "friendster": bin_path = "/tmp/dataset/friendster/friendster_{}.bin".format(format) if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] else: # the original node IDs of friendster are not consecutive, so we compact it g = dgl.compact_graphs(get_friendster()).formats(format) dgl.save_graphs(bin_path, [g]) elif name == "reddit": bin_path = "/tmp/dataset/reddit/reddit_{}.bin".format(format) if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] else: g = dgl.data.RedditDataset(self_loop=True)[0].formats(format) dgl.save_graphs(bin_path, [g]) elif name.startswith("ogb"): g = get_ogb_graph(name) else: raise Exception("Unknown dataset") # GRAPH_CACHE[name] = g g = g.formats(format) return g
def load_and_cache_examples(args, processor, retrievers, relation_list, input_dir, evaluate=False, output_examples=False): """ :param args: arguments. Here use "local_rank", "cache_dir", "model_type", "max_seq_length", "data_dir", "train_file", "tokenization_train_filepath", "predict_file", "tokenization_dev_filepath", "retrieved_nell_concept_filepath", :param tokenizer: the predefined tokenzier, correpsonding to the type of model. Each model has its own tokenizer. :param evaluate: bool. An indicator for loading train file or dev file. :param output_examples: bool. To decide whether to output examples. :return: """ if args.local_rank not in [-1, 0]: # Make sure only the first process in distributed training process the dataset, and the others will use the cache torch.distributed.barrier() # Load data features from cache or dataset file if args.test: temp_mark = "test" elif evaluate: temp_mark = "dev" else: temp_mark = "train" cached_features_file = os.path.join( input_dir, "cached_{}_{}_{}".format( temp_mark, args.model_type, str(args.cache_file_suffix), ), ) # Init features and dataset from cache if it exists if os.path.exists(cached_features_file) and not args.overwrite_cache: logger.info("Loading features from cached file %s", cached_features_file) features_and_dataset = torch.load(cached_features_file) features, dataset, examples_tokenized = ( features_and_dataset["features"], features_and_dataset["dataset"], features_and_dataset["examples"], ) if args.model_type == "kelm": all_kgs_graphs, all_kgs_graphs_label_dict = load_graphs(cached_features_file + "_all_kgs_graphs.bin") else: all_kgs_graphs, all_kgs_graphs_label_dict = [], [] else: logger.error("dataset not exist and program exits") exit() if args.local_rank == 0: # Make sure only the first process in distributed training process the dataset, and the others will use the cache torch.distributed.barrier() logger.info("{} load data is done".format(args.local_rank)) if output_examples: return dataset, examples_tokenized, features, all_kgs_graphs, all_kgs_graphs_label_dict # exit() return dataset, all_kgs_graphs, all_kgs_graphs_label_dict
def load(cls, path): import json import dgl homograph = dgl.load_graphs(path + "/homograph.bin")[0][0] heterograph = dgl.load_graphs(path + "/heterograph.bin")[0][0] with open(path + "/mol.json", "r") as f_handle: mol = json.load(f_handle) from openff.toolkit.topology import Molecule try: mol = Molecule.from_json(mol) except: mol = Molecule.from_dict(mol) g = cls(mol=mol, homograph=homograph, heterograph=heterograph) return g
def main(args): # load an exisitng model or ask for training a model model_path = os.path.join('./', 'dummy_model_{}.pth'.format(args.dataset)) if os.path.exists(model_path): model_stat_dict = th.load(model_path) else: raise FileExistsError('No Saved Model file. Please train a GNN model first...') # load graph, feat, and label g_list, label_dict = load_graphs('./'+args.dataset+'.bin') graph = g_list[0] labels = graph.ndata['label'] feats = graph.ndata['feat'] num_classes = max(labels).item() + 1 feat_dim = feats.shape[1] hid_dim = label_dict['hid_dim'].item() # create a model and load from state_dict dummy_model = dummy_gnn_model(feat_dim, hid_dim, num_classes) dummy_model.load_state_dict(model_stat_dict) # Choose a node of the target class to be explained and extract its subgraph. # Here just pick the first one of the target class. target_list = [i for i, e in enumerate(labels) if e==args.target_class] n_idx = th.tensor([target_list[0]]) # Extract the computation graph within k-hop of target node and use it for explainability sub_graph, ori_n_idxes, new_n_idx = extract_subgraph(graph, n_idx, hops=args.hop) #Sub-graph features. sub_feats = feats[ori_n_idxes,:] # create an explainer explainer = NodeExplainerModule(model=dummy_model, num_edges=sub_graph.number_of_edges(), node_feat_dim=feat_dim) # define optimizer optim = th.optim.Adam([explainer.edge_mask, explainer.node_feat_mask], lr=args.lr, weight_decay=args.wd) # train the explainer for the given node dummy_model.eval() model_logits = dummy_model(sub_graph, sub_feats) model_predict = F.one_hot(th.argmax(model_logits, dim=-1), num_classes) for epoch in range(args.epochs): explainer.train() exp_logits = explainer(sub_graph, sub_feats) loss = explainer._loss(exp_logits[new_n_idx], model_predict[new_n_idx]) optim.zero_grad() loss.backward() optim.step() # visualize the importance of edges edge_weights = explainer.edge_mask.sigmoid().detach() visualize_sub_graph(sub_graph, edge_weights.numpy(), ori_n_idxes, n_idx)
def load_synthetic_route_data(): synthetic_route_graphs, _ = dgl.load_graphs( f"{PROCESSED_DATA_DIR}/synthetic_route_graphs.pt") with open(f"{PROCESSED_DATA_DIR}/synthetic_route_node2smis.pt", "rb") as f: synthetic_route_node2smis = torch.load(f) synthetic_route_molgraphs, _ = dgl.load_graphs( f"{PROCESSED_DATA_DIR}/synthetic_route_molgraphs.pt") synthetic_route_node2molgraphs = [] offset = 0 for node2smi in synthetic_route_node2smis: nodes = list(node2smi.keys()) molgraphs = synthetic_route_molgraphs[offset:offset + len(nodes)] synthetic_route_node2molgraphs.append( {node: molgraph for node, molgraph in zip(nodes, molgraphs)}) return synthetic_route_graphs, synthetic_route_node2smis, synthetic_route_node2molgraphs
def load(self): # Generate paths graphs_path, info_path = tuple((path_saves + x) for x in self.get_dataset_name()) # Load graphs self.graphs, label_dict = load_graphs(graphs_path) self.labels = label_dict['labels'] # Load info self.data['typemaps'] = load_info(info_path)['typemaps'] self.data['coordinates'] = load_info(info_path)['coordinates']
def get_graph(name, format): # global GRAPH_CACHE # if name in GRAPH_CACHE: # return GRAPH_CACHE[name].to(format) g = None if name == 'cora': g = dgl.data.CoraGraphDataset(verbose=False)[0] elif name == 'pubmed': g = dgl.data.PubmedGraphDataset(verbose=False)[0] elif name == 'livejournal': bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format) if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] else: g = get_livejournal().formats([format]) dgl.save_graphs(bin_path, [g]) elif name == "friendster": bin_path = "/tmp/dataset/friendster/friendster_{}.bin".format(format) if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] else: g = get_friendster().formats([format]) dgl.save_graphs(bin_path, [g]) elif name == "reddit": bin_path = "/tmp/dataset/reddit/reddit_{}.bin".format(format) if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] else: g = dgl.data.RedditDataset(self_loop=True)[0].formats([format]) dgl.save_graphs(bin_path, [g]) elif name.startswith("ogb"): g = get_ogb_graph(name) else: raise Exception("Unknown dataset") # GRAPH_CACHE[name] = g g = g.formats([format]) return g
def train(args): set_random_seed(args.seed) device = get_device(args.device) data, g, features, labels, predict_ntype, train_idx, val_idx, test_idx, _ = \ load_data(args.dataset, device) add_node_feat(g, 'one-hot') (*mgs, pos_g), _ = dgl.load_graphs(args.pos_graph_path) mgs = [mg.to(device) for mg in mgs] if args.use_data_pos: pos_v, pos_u = data.pos pos_g = dgl.graph((pos_u, pos_v), device=device) pos = torch.zeros((g.num_nodes(predict_ntype), g.num_nodes(predict_ntype)), dtype=torch.int, device=device) pos[data.pos] = 1 model = RHCOFull( {ntype: g.nodes[ntype].data['feat'].shape[1] for ntype in g.ntypes}, args.num_hidden, data.num_classes, args.num_rel_hidden, args.num_heads, g.ntypes, g.canonical_etypes, predict_ntype, args.num_layers, args.dropout, len(mgs), args.tau, args.lambda_).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs, eta_min=args.lr / 100) alpha = args.contrast_weight warnings.filterwarnings( 'ignore', 'Setting attributes on ParameterDict is not supported') for epoch in range(args.epochs): model.train() contrast_loss, logits = model(g, g.ndata['feat'], mgs, features, pos) clf_loss = F.cross_entropy(logits[train_idx], labels[train_idx]) loss = alpha * contrast_loss + (1 - alpha) * clf_loss optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() torch.cuda.empty_cache() print(('Epoch {:d} | Loss {:.4f} | ' + METRICS_STR).format( epoch, loss.item(), *evaluate(model, g, labels, train_idx, val_idx, test_idx))) model.eval() _, base_pred = model(g, g.ndata['feat'], mgs, features, pos) mask = torch.cat([train_idx, val_idx]) logits = smooth(base_pred, pos_g, labels, mask, args) _, _, test_acc, _, _, test_f1 = calc_metrics(logits, labels, train_idx, val_idx, test_idx) print('After smoothing: Test Acc {:.4f} | Test Macro-F1 {:.4f}'.format( test_acc, test_f1))
def ConvLabeldGraphLoad(dir='../u_label_plus_10/'): graph_list = os.listdir(dir) # kick DS_Store files -> find cleaner way glist = [] for graph_name in graph_list: g, _ = dgl.load_graphs(dir + graph_name) g = g[0] glist.append(g) return glist
def test_deserialize_old_heterograph_file(): path = os.path.join(os.path.dirname(__file__), "data/hetero1.bin") g_list, label_dict = dgl.load_graphs(path) assert g_list[0].idtype == F.int64 assert g_list[3].idtype == F.int32 assert np.allclose(F.asnumpy(g_list[2].nodes['user'].data['hh']), np.ones((4, 5))) assert np.allclose(F.asnumpy(g_list[5].nodes['user'].data['hh']), np.ones((4, 5))) edges = g_list[0]['follows'].edges() assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2])) assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3])) assert F.allclose(label_dict['graph_label'], F.ones(54))
def load(self): self.graph_list = [] self.label_list = [] path = "dataset/homograph/test" scenarios = os.listdir(path) for scenario in scenarios: file_path = path + '/' + scenario graphs = os.listdir(file_path) for graph in graphs: g_list, label_dict = dgl.load_graphs(file_path + '/' + graph) self.graph_list.append(g_list[0]) if scenario == 'normal': self.label_list.append(1) else: self.label_list.append(0)
def load_dgl(graph_path, info_path=None): """ Loads saved dgl graphs, labels and other info. :param graph_path: :param info_path: :return: """ # load processed data from directory graph_path logger.info(f'Loading graph data from: {graph_path}') graphs, label_dict = load_graphs(graph_path) labels = label_dict['labels'] if info_path is not None: info = load_info(info_path)['info'] return graphs, labels, info return graphs, labels
def test_serialize_heterograph_s3(): path = "s3://dglci-data-test/graph2.bin" g_list0 = create_heterographs(F.int64) + create_heterographs(F.int32) dgl.save_graphs(path, g_list0) g_list = dgl.load_graphs(path, [0, 2, 5]) assert g_list[0].idtype == F.int64 #assert g_list[1].restrict_format() == 'csr' assert np.allclose(F.asnumpy(g_list[1].nodes['user'].data['hh']), np.ones((4, 5))) assert np.allclose(F.asnumpy(g_list[2].nodes['user'].data['hh']), np.ones((4, 5))) edges = g_list[0]['follows'].edges() assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2])) assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
def test_load_old_files1(): loadg_list, _ = dgl.load_graphs( os.path.join(os.path.dirname(__file__), "data/1.bin")) idx, num_nodes, edge0, edge1, edata_e1, edata_e2, ndata_n1 = np.load( os.path.join(os.path.dirname(__file__), "data/1.npy"), allow_pickle=True) load_g = loadg_list[idx] load_edges = load_g.all_edges('uv', 'eid') assert np.allclose(F.asnumpy(load_edges[0]), edge0) assert np.allclose(F.asnumpy(load_edges[1]), edge1) assert np.allclose(F.asnumpy(load_g.edata['e1']), edata_e1) assert np.allclose(F.asnumpy(load_g.edata['e2']), edata_e2) assert np.allclose(F.asnumpy(load_g.ndata['n1']), ndata_n1)
def test_load_old_files2(): loadg_list, labels0 = dgl.load_graphs( os.path.join(os.path.dirname(__file__), "data/2.bin")) labels1 = load_labels(os.path.join(os.path.dirname(__file__), "data/2.bin")) idx, edges0, edges1, np_labels = np.load(os.path.join( os.path.dirname(__file__), "data/2.npy"), allow_pickle=True) assert np.allclose(F.asnumpy(labels0['label']), np_labels) assert np.allclose(F.asnumpy(labels1['label']), np_labels) load_g = loadg_list[idx] print(load_g) load_edges = load_g.all_edges('uv', 'eid') assert np.allclose(F.asnumpy(load_edges[0]), edges0) assert np.allclose(F.asnumpy(load_edges[1]), edges1)
def __getitem__(self, index): path = self.paths[index] graph_list, label_dict = dgl.load_graphs(path) graph = graph_list[0] assert torch.sum(torch.isnan(graph.ndata['geometric_feat']['edge'])) == 0, print(path) graph.apply_nodes(lambda nodes: {'geometric_feat': (nodes.data['geometric_feat'] - self.mean_node_feat)/self.std_node_feat}, ntype='node') graph.apply_nodes(lambda nodes: {'geometric_feat': (nodes.data['geometric_feat'] - self.mean_edge_feat)/self.std_edge_feat}, ntype='edge') graph.apply_nodes(lambda nodes: {'geometric_feat': (nodes.data['geometric_feat'] - self.mean_face_feat)/self.std_face_feat}, ntype='face') label = graph.ndata['label'] if self.opt.save_prediction_for_test_files: return path, graph, label return graph, label
def load(self): self.graph_list = [] self.label_list = [] for child_dir in ['normal', 'attack']: homograph = "dataset/homograph/" + child_dir + '/' scenarios = os.listdir(homograph) for scenario in scenarios: file_path = homograph + scenario graphs = os.listdir(file_path) for graph in graphs: g_list, label_dict = dgl.load_graphs(file_path + '/' + graph) self.graph_list.append(g_list[0]) for key, value in label_dict.items(): if key != 'Drive-by-download': self.label_list.append(0) else: self.label_list.append(1)
def train(args): set_random_seed(args.seed) device = get_device(args.device) data, _, feat, labels, _, train_idx, val_idx, test_idx, evaluator = \ load_data(args.dataset, device) feat = (feat - feat.mean(dim=0)) / feat.std(dim=0) # 标签传播图 if args.dataset in ('acm', 'dblp'): pos_v, pos_u = data.pos pg = dgl.graph((pos_u, pos_v), device=device) else: pg = dgl.load_graphs(args.prop_graph)[0][-1].to(device) if args.dataset == 'oag-venue': labels[labels == -1] = 0 base_model = nn.Linear(feat.shape[1], data.num_classes).to(device) train_base_model(base_model, feat, labels, train_idx, val_idx, test_idx, evaluator, args) correct_and_smooth(base_model, pg, feat, labels, train_idx, val_idx, test_idx, evaluator, args)
def test_graph_serialize_with_feature(is_hetero): num_graphs = 100 t0 = time.time() g_list = construct_graph(num_graphs, is_hetero) t1 = time.time() # create a temporary file and immediately release it so DGL can open it. f = tempfile.NamedTemporaryFile(delete=False) path = f.name f.close() dgl.save_graphs(path, g_list) t2 = time.time() idx_list = np.random.permutation(np.arange(num_graphs)).tolist() loadg_list, _ = dgl.load_graphs(path, idx_list) t3 = time.time() idx = idx_list[0] load_g = loadg_list[0] print("Save time: {} s".format(t2 - t1)) print("Load time: {} s".format(t3 - t2)) print("Graph Construction time: {} s".format(t1 - t0)) assert F.allclose(load_g.nodes(), g_list[idx].nodes()) load_edges = load_g.all_edges('uv', 'eid') g_edges = g_list[idx].all_edges('uv', 'eid') assert F.allclose(load_edges[0], g_edges[0]) assert F.allclose(load_edges[1], g_edges[1]) assert F.allclose(load_g.edata['e1'], g_list[idx].edata['e1']) assert F.allclose(load_g.edata['e2'], g_list[idx].edata['e2']) assert F.allclose(load_g.ndata['n1'], g_list[idx].ndata['n1']) os.unlink(path)