def __init__(self, params): self.p = params self.prj_path = Path(__file__).parent.resolve() self.data = load_data(self.p.dataset) self.num_ent, self.train_data, self.valid_data, self.test_data, self.num_rels = self.data.num_nodes, self.data.train, self.data.valid, self.data.test, self.data.num_rels self.triplets = process( { 'train': self.train_data, 'valid': self.valid_data, 'test': self.test_data }, self.num_rels) if self.p.gpu != -1 and torch.cuda.is_available(): self.device = torch.device(f'cuda:{self.p.gpu}') # ------------------------------- # torch.cuda.set_rng_state(torch.cuda.get_rng_state()) # torch.backends.cudnn.deterministic = True # ------------------------------- else: self.device = torch.device('cpu') self.p.embed_dim = self.p.k_w * self.p.k_h if self.p.embed_dim is None else self.p.embed_dim # output dim of gnn self.data_iter = self.get_data_iter() self.g = self.build_graph() self.edge_type, self.edge_norm = self.get_edge_dir_and_norm() self.model = self.get_model() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.p.lr, weight_decay=self.p.l2) self.best_val_mrr, self.best_epoch, self.best_val_results = 0., 0., {} pprint(vars(self.p))
def main(args): # load graph data writers, readers = get_all_writers_readers() if args.tsne: if args.tsne == 40: tsne(writers, readers, 40) else: tsne(writers, readers) elif args.closest == 'writers' or args.closest == 'relations': closest(writers, readers, args.data) elif args.umap: if args.umap == 40: umap(writers, readers, 40) else: umap(writers, readers) elif args.data: import os entity_path = os.path.join('data', 'entities.dict') relation_path = os.path.join('data', 'relations.dict') train_path = os.path.join('data', 'train.txt') valid_path = os.path.join('data', 'valid.txt') test_path = os.path.join('data', 'test.txt') entity_dict = _read_dictionary(entity_path) relation_dict = _read_dictionary(relation_path) train = np.array( _read_triplets_as_list(train_path, entity_dict, relation_dict)) valid = np.array( _read_triplets_as_list(valid_path, entity_dict, relation_dict)) test = np.array( _read_triplets_as_list(test_path, entity_dict, relation_dict)) num_nodes = len(entity_dict) print("# entities: {}".format(num_nodes)) num_rels = len(relation_dict) print("# relations: {}".format(num_rels)) print("# edges: {}".format(len(train))) num_nodes = num_nodes train_data = train valid_data = valid test_data = test num_rels = num_rels training_process(num_nodes, train_data, valid_data, test_data, num_rels) else: dataset = 'FB15k-237' data = load_data(dataset) num_nodes = data.num_nodes train_data = data.train valid_data = data.valid test_data = data.test num_rels = data.num_rels training_process(num_nodes, train_data, valid_data, test_data, num_rels)
def main(dataset): # load graph data if not os.path.exists(dataset): os.mkdir(dataset) os.chdir(dataset) data = load_data(dataset, bfs_level=3, relabel=False) num_nodes = data.num_nodes num_rels = data.num_rels num_classes = data.num_classes labels = data.labels train_idx = data.train_idx test_idx = data.test_idx print(type(num_nodes)) print(type(num_rels)) print(type(num_classes)) with open('num.txt', 'w') as f: f.write('{}#{}#{}'.format(num_nodes, num_rels, num_classes)) np.save('labels.npy', labels) print(type(train_idx)) np.save('trainIdx.npy', train_idx) print(type(test_idx)) np.save('testIdx.npy', test_idx) # split dataset into train, validate, test # since the nodes are featureless, the input feature is then the node id. feats = torch.arange(num_nodes) # edge type and normalization factor print('edge_src type = ', type(data.edge_src)) print('shape = ', data.edge_src.shape) print(data.edge_src) np.save('edgeSrc.npy', data.edge_src) np.save('edgeDst.npy', data.edge_dst) print('***') print('edge_type type =', type(data.edge_type)) print('edge_type shape =', data.edge_type.shape) print(data.edge_type) np.save('edgeType.npy', data.edge_type) print('***') print('edge_norm type =', type(data.edge_norm)) print('edge_norm shape = ', data.edge_norm.shape) print(data.edge_norm) np.save('edgeNorm.npy', data.edge_norm) print('***') print('Finish extracting dataset : {}'.format(dataset)) os.chdir('..')
def __init__(self, params): self.p = params self.prj_path = Path(__file__).parent.resolve() self.time_stamp = time.strftime('%Y_%m_%d') + '_' + time.strftime( '%H:%M:%S') self.data = load_data(self.p.dataset) self.num_nodes, self.train_data, self.valid_data, self.test_data, self.num_rels = self.data.num_nodes, self.data.train, self.data.valid, self.data.test, self.data.num_rels if torch.cuda.is_available() and params.gpu >= 0: self.device = torch.device(f'cuda:{params.gpu}') else: self.device = torch.device('cpu') self.val_test_data = preprocess({ 'train': self.train_data, 'valid': self.valid_data, 'test': self.test_data }) self.data_iter = self.get_data_iter() # self.rel: relations in train set self.graph, self.rel, node_norm = build_graph(num_nodes=self.num_nodes, num_rels=self.num_rels, edges=self.train_data) self.rel = torch.from_numpy(self.rel).to(self.device) # used to sample sub-graph self.in_deg = self.graph.in_degrees(range( self.graph.number_of_nodes())).float().view(-1, 1) self.test_node_id = torch.arange( 0, self.num_nodes, dtype=torch.long).view(-1, 1).to(self.device) self.test_edge_norm = node_norm_2_edge_norm( self.graph, torch.from_numpy(node_norm).view(-1, 1)).to(self.device) self.adj_list = get_adj(self.num_nodes, self.train_data) self.model = self.get_model() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=params.lr)
def main(args): # load graph data if args.dataset == "FB15K-237": data = load_data(args.dataset) num_nodes = data.num_nodes train_data = data.train valid_data = data.valid test_data = data.test num_rels = data.num_rels train_network = None # Deletion experiment # delete_fraction = args.delete_fraction # delete_indices = random.sample(range(len(train_data)), int(delete_fraction * len(train_data))) # train_data = np.array([tup for i, tup in enumerate(train_data) if i not in delete_indices]) # selected_nodes = train_data[:,0].tolist() + train_data[:,2].tolist() # num_nodes = len(set(selected_nodes)) # Store entity-wise dicts for filtered metrics all_tuples = train_data.tolist() + valid_data.tolist( ) + test_data.tolist() # print("Graph Density: %f" % (len(train_data) / (num_nodes * (num_nodes - 1)))) elif args.dataset == "atomic": num_nodes, train_data, valid_data, test_data, num_rels, valid_labels, test_labels, train_network = load_atomic_data( args.dataset, args.sim_relations) all_tuples = train_data.tolist() + valid_data.tolist( ) + test_data.tolist() elif args.dataset == "conceptnet": num_nodes, train_data, valid_data, test_data, num_rels, valid_labels, test_labels, train_network = load_cn_data( args.dataset, args.sim_relations, args.eval_accuracy) all_tuples = train_data.tolist() + valid_data.tolist( ) + test_data.tolist() elif args.dataset == "conceptnet-5.6": num_nodes, train_data, valid_data, test_data, num_rels, valid_labels, test_labels, train_network = load_cn_full_data( args.dataset, args.sim_relations) all_tuples = train_data.tolist() + valid_data.tolist( ) + test_data.tolist() elif args.dataset == "FB15k-237": num_nodes, train_data, valid_data, test_data, num_rels, valid_labels, test_labels, train_network = load_fb15k_data( args.dataset, args.sim_relations) all_tuples = train_data.tolist() + valid_data.tolist( ) + test_data.tolist() else: raise ValueError("Invalid Option for Dataset") # for filtered ranking all_e1_to_multi_e2, all_e2_to_multi_e1 = reader_utils.create_entity_dicts( all_tuples, num_rels, args.sim_relations) # for training train_e1_to_multi_e2, train_e2_to_multi_e1 = reader_utils.create_entity_dicts( train_data.tolist(), num_rels, args.sim_relations) # check cuda use_cuda = torch.cuda.is_available() #use_cuda = False if use_cuda and not args.no_cuda: torch.cuda.set_device(args.gpu) # create model model = LinkPredict(train_network, num_nodes, num_rels, args, use_cuda=use_cuda) # validation and testing triplets valid_data = torch.LongTensor(valid_data) test_data = torch.LongTensor(test_data) if use_cuda and not args.no_cuda: valid_data = valid_data.cuda() test_data = test_data.cuda() # build test graph if args.sim_sim and args.sim_relations: graph_train_data = utils.sim_sim_connect(train_data, train_data, num_rels) else: graph_train_data = train_data test_graph, test_rel, test_norm = utils.build_test_graph( num_nodes, num_rels, graph_train_data) test_deg = test_graph.in_degrees(range( test_graph.number_of_nodes())).float().view(-1, 1) test_node_id = torch.arange(0, num_nodes, dtype=torch.long).view(-1, 1) test_rel = torch.from_numpy(test_rel).view(-1, 1) test_norm = torch.from_numpy(test_norm).view(-1, 1) if use_cuda and not args.no_cuda: test_node_id = test_node_id.cuda() test_norm = test_norm.cuda() test_rel = test_rel.cuda() test_graph.ndata.update({'id': test_node_id, 'norm': test_norm}) # Add bert embedding test_graph.edata['type'] = test_rel if use_cuda and not args.no_cuda: model.cuda() name = '_standard_model_state.pth' name = "_" + args.model + "_" + args.decoder + name if args.sim_relations: name = "_sim_relations" + name if args.sim_sim: name = "_sim-sim" + name if args.bert_trainable: name = '_bert_trainable_model_state.pth' if args.bert: name = '_bert_model_state.pth' if args.input_layer == "bert": name = "_inp-bert" + name #name = str(datetime.now().time()).split(".")[0] + name model_state_file = args.dataset + name writer = SummaryWriter("runs/" + model_state_file.replace(".pth", ".log")) if args.eval_only: if args.model_name: model_state_file = args.model_name checkpoint = torch.load(model_state_file) #if use_cuda: # model.cpu() # test on CPU model.eval() model.load_state_dict(checkpoint['state_dict']) #model.rgcn.layers[-1].device = torch.device("cpu") print(model) if args.dataset != "atomic" and args.dataset != "conceptnet": valid_labels = None test_labels = None else: valid_labels = torch.LongTensor(valid_labels) test_labels = torch.LongTensor(test_labels) if args.eval_accuracy: threshold = utils.evaluate_accuracy(test_graph, model, valid_data, num_nodes, labels=valid_labels, network=train_network, eval_bz=args.eval_batch_size) utils.evaluate_accuracy(test_graph, model, test_data, num_nodes, labels=test_labels, network=train_network, threshold=threshold, eval_bz=args.eval_batch_size) else: mrr = utils.ranking_and_hits(test_graph, model, valid_data, all_e1_to_multi_e2, valid_labels, train_network, comb="graph", sim_relations=args.sim_relations) mrr = utils.ranking_and_hits(test_graph, model, test_data, all_e1_to_multi_e2, test_labels, train_network, comb="graph", sim_relations=args.sim_relations) #mrr = utils.evaluate(test_graph, model, valid_data, all_e1_to_multi_e2, num_nodes, valid_labels, train_network, # hits=[1, 3, 10], eval_bz=args.eval_batch_size) #mrr = utils.evaluate(test_graph, model, test_data, all_e1_to_multi_e2, num_nodes, test_labels, train_network, # hits=[1, 3, 10], eval_bz=args.eval_batch_size) sys.exit(0) # build adj list and calculate degrees for sampling adj_list, degrees, sparse_adj_matrix, rel = utils.get_adj_and_degrees( num_nodes, num_rels, train_data) # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) if os.path.isfile(model_state_file): print(model_state_file) overwrite = input('Model already exists. Overwrite? Y = yes, N = no\n') if overwrite.lower() == 'n': print("Quitting") sys.exit(0) elif overwrite.lower() != 'y': raise ValueError("Invalid Option") forward_time = [] backward_time = [] # training loop print("Starting training...") epoch = 0 best_mrr = 0 while True: model.train() epoch += 1 g = test_graph data = graph_train_data data = torch.from_numpy(data) labels = None if use_cuda and not args.no_cuda: data = data.cuda() batch_size = 128 e1_keys = list(train_e1_to_multi_e2.keys()) random.shuffle(e1_keys) cum_loss = 0.0 for i in range(0, len(e1_keys), batch_size): graph_embeddings = model.get_graph_embeddings( g, data, labels, train_network) optimizer.zero_grad() batch = e1_keys[i:i + batch_size] e1 = [elem[0] for elem in batch] rel = [elem[1] for elem in batch] e2 = [train_e1_to_multi_e2[elem] for elem in batch] target = torch.zeros((len(batch), num_nodes)) for j, inst in enumerate(e2): target[j, inst] = 1.0 target = ((1.0 - args.label_smoothing_epsilon) * target) + (1.0 / target.size(1)) if use_cuda and not args.no_cuda: target = target.cuda() t0 = time.time() loss = model.get_score(batch, target, graph_embeddings, train_network) cum_loss += loss.cpu().item() t1 = time.time() loss.backward(retain_graph=True) #loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_norm) # clip gradients optimizer.step() t2 = time.time() forward_time.append(t1 - t0) backward_time.append(t2 - t1) print( "Epoch {:04d} | Loss {:.4f} | Best MRR {:.4f} | Forward {:.4f}s | Backward {:.4f}s" .format(epoch, cum_loss, best_mrr, forward_time[-1], backward_time[-1])) writer.add_scalar('data/loss', cum_loss, epoch) # Save model every 100 epochs if epoch + 1 % 100 == 0: print("saving current model..") torch.save({ 'state_dict': model.state_dict(), 'epoch': epoch }, model_state_file) # validation if epoch % args.evaluate_every == 0: # perform validation on CPU because full graph is too large #if use_cuda: # model.cpu() model.eval() #model.rgcn.layers[0].device = torch.device("cpu") #model.rgcn.layers[-1].device = torch.device("cpu") print("start eval") labels = len(valid_data) * [1] labels = torch.LongTensor(labels) if use_cuda and not args.no_cuda: labels = labels.cuda() mrr = utils.ranking_and_hits(test_graph, model, valid_data, all_e1_to_multi_e2, labels, train_network, comb="graph", sim_relations=args.sim_relations) #mrr = utils.evaluate(test_graph, model, valid_data, e1_to_multi_e2, num_nodes, labels, train_network, # hits=[1, 3, 10], eval_bz=args.eval_batch_size) writer.add_scalar('data/mrr', mrr, epoch) metrics = {"best_mrr": best_mrr, "cum_loss": cum_loss} os.makedirs(args.output_dir, exist_ok=True) with open(os.path.join("/output/", 'metrics.json'), 'w') as f: f.write(json.dumps(metrics)) #mrr = utils.evaluate(test_graph, model, test_data, num_nodes, labels, train_network, # hits=[1, 3, 10], eval_bz=args.eval_batch_size) # save best model # torch.save({'state_dict': model.state_dict(), 'epoch': epoch}, # model_state_file) if mrr < best_mrr: if epoch >= args.n_epochs: break else: best_mrr = mrr torch.save({ 'state_dict': model.state_dict(), 'epoch': epoch }, model_state_file) if use_cuda and not args.no_cuda: model.cuda() #model.rgcn.layers[-1].device = torch.device("cuda") #model.rgcn.layers[0].device = torch.device("cuda") print("training done") print("Mean forward time: {:4f}s".format(np.mean(forward_time))) print("Mean Backward time: {:4f}s".format(np.mean(backward_time))) writer.export_scalars_to_json("./all_scalars.json") writer.close() print("\nstart testing") # use best model checkpoint checkpoint = torch.load(model_state_file) #if use_cuda: # model.cpu() # test on CPU model.eval() model.load_state_dict(checkpoint['state_dict']) print("Using best epoch: {}".format(checkpoint['epoch'])) labels = len(test_data) * [1] mrr = utils.ranking_and_hits(test_graph, model, test_data, all_e1_to_multi_e2, labels, train_network, comb="graph", sim_relations=args.sim_relations)
def main(args): # load graph data data = load_data(args.dataset, bfs_level=args.bfs_level, relabel=args.relabel) num_nodes = data.num_nodes num_rels = data.num_rels num_classes = data.num_classes labels = data.labels train_idx = data.train_idx test_idx = data.test_idx # split dataset into train, validate, test if args.validation: val_idx = train_idx[:len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5:] else: val_idx = train_idx # since the nodes are featureless, the input feature is then the node id. feats = torch.arange(num_nodes) # edge type and normalization factor edge_type = torch.from_numpy(data.edge_type).long() edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1).float() labels = torch.from_numpy(labels).view(-1).long() # check cuda use_cuda = args.gpu >= 0 and torch.cuda.is_available() if use_cuda: torch.cuda.set_device(args.gpu) feats = feats.cuda() edge_type = edge_type.cuda() edge_norm = edge_norm.cuda() labels = labels.cuda() # create graph g = DGLGraph() g.add_nodes(num_nodes) g.add_edges_with_type(data.edge_src, data.edge_dst, data.edge_type) #tu_forward = sorted(list(zip(data.edge_src, data.edge_dst, data.edge_type)), key=lambda x : (x[1], x[2])) #tu_backward = sorted(list(zip(data.edge_dst, data.edge_src, data.edge_type)), key=lambda x : (x[1], x[2])) #def compute_e_to_distict_t(tu): # num_edges = len(tu) # all_node_distinct_types = 0 # cur_node = tu[0][1] # type_set = set() # type_set.add(tu[0][2]) # for i in range(1, len(tu)): # if tu[i][1] == cur_node: # type_set.add(tu[i][2]) # else: # all_node_distinct_types += len(type_set) # cur_node = tu[i][1] # type_set.clear() # type_set.add(tu[i][2]) # all_node_distinct_types += len(type_set) # type_set.clear() # #print('\n'.join([str(t) for t in tu])) # print('num_edges:', num_edges, 'node distinct types', all_node_distinct_types) # return num_edges/all_node_distinct_types #r_forward = compute_e_to_distict_t(tu_forward) #r_backward = compute_e_to_distict_t(tu_backward) #print('ratio forward:', r_forward, 'ratio_backward:', r_backward) model = EGLRGCNModel(num_nodes, args.hidden_size, num_classes, num_rels, edge_type.size(0), num_bases=args.num_bases, activation=F.relu, dropout=args.dropout) if use_cuda: model.cuda() # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2norm) # training loop print("start training...") forward_time = [] backward_time = [] model.train() train_labels = labels[train_idx] train_idx = list(train_idx) for epoch in range(args.num_epochs): optimizer.zero_grad() t0 = time.time() logits = model(g, feats, edge_type, edge_norm) tb = time.time() train_logits = logits[train_idx] ta = time.time() loss = F.cross_entropy(train_logits, train_labels) t1 = time.time() loss.backward() optimizer.step() torch.cuda.synchronize() t2 = time.time() if epoch >= 3: forward_time.append(t1 - t0) backward_time.append(t2 - t1) print( "Epoch {:05d} | Train Forward Time(s) {:.4f} | Backward Time(s) {:.4f}" .format(epoch, forward_time[-1], backward_time[-1])) train_acc = torch.sum(logits[train_idx].argmax( dim=1) == labels[train_idx]).item() / len(train_idx) val_loss = F.cross_entropy(logits[val_idx], labels[val_idx]) val_acc = torch.sum(logits[val_idx].argmax( dim=1) == labels[val_idx]).item() / len(val_idx) print( "Train Accuracy: {:.4f} | Train Loss: {:.4f} | Validation Accuracy: {:.4f} | Validation loss: {:.4f}" .format(train_acc, loss.item(), val_acc, val_loss.item())) print('max memory allocated', torch.cuda.max_memory_allocated()) model.eval() logits = model.forward(g, feats, edge_type, edge_norm) test_loss = F.cross_entropy(logits[test_idx], labels[test_idx]) test_acc = torch.sum(logits[test_idx].argmax( dim=1) == labels[test_idx]).item() / len(test_idx) print("Test Accuracy: {:.4f} | Test loss: {:.4f}".format( test_acc, test_loss.item())) print() print("Mean forward time: {:4f}".format( np.mean(forward_time[len(forward_time) // 4:]))) print("Mean backward time: {:4f}".format( np.mean(backward_time[len(backward_time) // 4:]))) Used_memory = torch.cuda.max_memory_allocated(0) / (1024**3) avg_run_time = np.mean(forward_time[len(forward_time) // 4:]) + np.mean( backward_time[len(backward_time) // 4:]) #output we need print('^^^{:6f}^^^{:6f}'.format(Used_memory, avg_run_time))
def preprocess_dglrgcn(*, dataset, out_folder, bfs_level=3, relabel=False, reverse_edges=False): """ :param dataset: :param out_folder: :param bfs_level: :param relabel: :param reverse_edges: backwards edges are added to the graph, if True 2x more edges + 2x more num_rels :return: """ if isinstance(bfs_level, str): bfs_level = int(bfs_level) if isinstance(relabel, str): relabel = strtobool(relabel) if isinstance(reverse_edges, str): reverse_edges = strtobool(reverse_edges) data = load_data(dataset=dataset, bfs_level=bfs_level, relabel=relabel) labels = torch.squeeze(torch.LongTensor(data.labels)) def _idx_to_mask(idx, n): mask = np.zeros(n, dtype=int) mask[idx] = 1 return torch.ByteTensor(mask) val_idx = data.train_idx[:len(data.train_idx) // 5] val_mask = _idx_to_mask(val_idx, labels.shape[0]) train_idx = data.train_idx[len(data.train_idx) // 5:] train_mask = _idx_to_mask(train_idx, labels.shape[0]) test_mask = _idx_to_mask(data.test_idx, labels.shape[0]) n_rels = data.num_rels # graph preprocess and calculate normalization factor g = DGLGraph() g.add_nodes(data.num_nodes) edge_src, edge_dst, edge_type = data.edge_src, data.edge_dst, torch.LongTensor( data.edge_type) if reverse_edges: g.add_edges(edge_src, edge_dst) g.add_edges(edge_dst, edge_src) edge_type = torch.cat((edge_type, edge_type + n_rels), 0) g.edata[GNN_EDGE_LABELS_KEY] = edge_type else: g.add_edges(edge_src, edge_dst) g.edata[GNN_EDGE_LABELS_KEY] = edge_type g.edata[GNN_EDGE_NORM] = torch.from_numpy(data.edge_norm).unsqueeze(1) save_pickle(g, complete_path(out_folder, GRAPH)) save_pickle(2 * n_rels if reverse_edges else n_rels, complete_path(out_folder, N_RELS)) save_pickle(data.num_classes, complete_path(out_folder, N_CLASSES)) torch.save(labels, complete_path(out_folder, LABELS)) torch.save(train_mask, complete_path(out_folder, TRAIN_MASK)) torch.save(test_mask, complete_path(out_folder, TEST_MASK)) torch.save(val_mask, complete_path(out_folder, VAL_MASK))
def main(args): # load graph data data = load_data(args.dataset, bfs_level=args.bfs_level, relabel=args.relabel) num_nodes = data.num_nodes num_rels = data.num_rels num_classes = data.num_classes labels = data.labels train_idx = data.train_idx test_idx = data.test_idx # split dataset into train, validate, test if args.validation: val_idx = train_idx[:len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5:] else: val_idx = train_idx train_idx = mx.nd.array(train_idx) # edge type and normalization factor edge_type = mx.nd.array(data.edge_type) edge_norm = mx.nd.array(data.edge_norm).expand_dims(1) labels = mx.nd.array(labels).reshape((-1)) # check cuda use_cuda = args.gpu >= 0 if use_cuda: ctx = mx.gpu(args.gpu) edge_type = edge_type.as_in_context(ctx) edge_norm = edge_norm.as_in_context(ctx) labels = labels.as_in_context(ctx) train_idx = train_idx.as_in_context(ctx) else: ctx = mx.cpu(0) # create graph g = DGLGraph() g.add_nodes(num_nodes) g.add_edges(data.edge_src, data.edge_dst) g.edata.update({'type': edge_type, 'norm': edge_norm}) # create model model = EntityClassify(len(g), args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, gpu_id=args.gpu) model.initialize(ctx=ctx) # optimizer trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr, 'wd': args.l2norm}) loss_fcn = gluon.loss.SoftmaxCELoss(from_logits=False) # training loop print("start training...") forward_time = [] backward_time = [] for epoch in range(args.n_epochs): t0 = time.time() with mx.autograd.record(): pred = model(g) loss = loss_fcn(pred[train_idx], labels[train_idx]) t1 = time.time() loss.backward() trainer.step(len(train_idx)) t2 = time.time() forward_time.append(t1 - t0) backward_time.append(t2 - t1) print("Epoch {:05d} | Train Forward Time(s) {:.4f} | Backward Time(s) {:.4f}". format(epoch, forward_time[-1], backward_time[-1])) train_acc = F.sum(pred[train_idx].argmax(axis=1) == labels[train_idx]).asscalar() / train_idx.shape[0] val_acc = F.sum(pred[val_idx].argmax(axis=1) == labels[val_idx]).asscalar() / len(val_idx) print("Train Accuracy: {:.4f} | Validation Accuracy: {:.4f}".format(train_acc, val_acc)) print() logits = model(g) test_acc = F.sum(logits[test_idx].argmax(axis=1) == labels[test_idx]).asscalar() / len(test_idx) print("Test Accuracy: {:.4f}".format(test_acc)) print() print("Mean forward time: {:4f}".format(np.mean(forward_time[len(forward_time) // 4:]))) print("Mean backward time: {:4f}".format(np.mean(backward_time[len(backward_time) // 4:])))
return RGCNLayer(self.h_dim, self.out_dim, self.num_rels, self.num_bases, activation = partial(F.softmax, dim = 1)) def forward(self, g): if self.features is not None: g.ndata["id"] = self.features for layer in self.layers: layer(g) return g.ndata.pop("h") # ////////////////// Inspecting the dataset //////////////////// # from dgl.contrib.data import load_data data = load_data(dataset = "aifb") num_nodes = data.num_nodes print(num_nodes) # // returns integer -> 8285 this is the number of nodes in this dataset num_rels = data.num_rels print(num_rels) # // returns integer -> 91 this is the number of relations but not sure if this is similar to features or what exactly this is num_classes = data.num_classes print(num_classes) # // returns integer -> 4 classes that the algorithm assigns data points to labels = data.labels print(labels) # // returns a vector: [[0][0][0]..] for i in range(len(labels)): if i == 10: break print(labels[i]) # -> returns either [0], [1], [2], [3]
def main(args): # load graph data data = load_data(args.dataset, bfs_level=args.bfs_level, relabel=args.relabel) num_nodes = data.num_nodes num_rels = data.num_rels num_classes = data.num_classes labels = data.labels train_idx = data.train_idx test_idx = data.test_idx # split dataset into train, validate, test if args.validation: val_idx = train_idx[:len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5:] else: val_idx = train_idx # edge type and normalization factor edge_type = torch.from_numpy(data.edge_type) edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1) labels = torch.from_numpy(labels).view(-1) # check cuda use_cuda = args.gpu >= 0 and torch.cuda.is_available() if use_cuda: torch.cuda.set_device(args.gpu) edge_type = edge_type.cuda() edge_norm = edge_norm.cuda() labels = labels.cuda() # create graph g = DGLGraph() g.add_nodes(num_nodes) g.add_edges(data.edge_src, data.edge_dst) g.edata.update({'type': edge_type, 'norm': edge_norm}) print(g.number_of_nodes(), g.number_of_edges(), num_classes) # create model model = EntityClassify(len(g), args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_cuda=use_cuda) if use_cuda: model.cuda() # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2norm) # training loop print("start training...") start = time.time() for epoch in range(args.n_epochs): model.train() optimizer.zero_grad() logits = model.forward(g) loss = F.cross_entropy(logits[train_idx], labels[train_idx]) loss.backward() optimizer.step() #model.eval() #logits = model.forward(g) #val_acc = torch.sum(logits[val_idx].argmax(dim=1) == labels[val_idx]).item() / len(val_idx) #print("Train Loss: {:.4f}, val acc {:.4f}, time {:.4f}".format(loss.item(), val_acc, time.time() - start)) print("Train Loss: {:.4f}".format(loss.item())) end = time.time() print(end - start)
def domodel190420(dataset = 'mutag'): data = load_data(dataset=dataset) num_nodes = data.num_nodes num_rels = data.num_rels num_classes = data.num_classes labels = data.labels train_idx = data.train_idx # split training and validation set val_idx = train_idx[:len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5:] # edge type and normalization factor edge_type = torch.from_numpy(data.edge_type) edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1) labels = torch.from_numpy(labels).view(-1) # create graph g = DGLGraph() g.add_nodes(num_nodes) g.add_edges(data.edge_src, data.edge_dst) g.edata.update({'type': edge_type.long(), 'norm': edge_norm}) inputs = torch.arange(num_nodes).reshape(-1,1) RNN_Hidden_Size = [] RGCN_Input_Size = [] RGCN_Hidden_Size = [] DROUPOUT = [] Num_Bases = [] Val_Acc = [] Numbase = [40] if dataset == 'mutag': Numbase = [0,30] elif dataset == 'aifb': Numbase = [0] for RNN_hidden_size in [20,30,40,50]: for RGCN_input_size in [10,20,30,40]: for RGCN_hidden_size in [10,20,30,40]: for dropout in [0,0.1,0.2,0.3,0.4,0.5]: for Num_bases in Numbase: RNN_Hidden_Size.append(RNN_hidden_size) RGCN_Input_Size.append(RGCN_input_size) RGCN_Hidden_Size.append(RGCN_hidden_size) RNN_input_size = num_nodes DROUPOUT.append(dropout) Num_Bases.append(Num_bases) # RNN_hidden_size = 50 # RGCN_input_size = 40 # RGCN_hidden_size = 20 Num_classes = num_classes Num_rels = num_rels #dropout = 0.5 activation = F.relu sequence_length = 1 #Num_bases=30 lr = 0.01 # learning rate l2norm = 5e-4 # L2 norm coefficient n_epochs = 50 # epochs to train model = Model(RNN_input_size, RNN_hidden_size, RGCN_input_size, RGCN_hidden_size, Num_classes, Num_rels, Num_bases=Num_bases, Num_hidden_layers=0, dropout=dropout) optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2norm) criterion = nn.CrossEntropyLoss() print("start training...") model.train() for epoch in range(n_epochs): optimizer.zero_grad() logits = model.forward(g,inputs,sequence_length) loss = criterion(logits[train_idx], labels[train_idx].long()) loss.backward() optimizer.step() train_acc = torch.sum(logits[train_idx].argmax(dim=1) == labels[train_idx].long()) train_acc = train_acc.item() / len(train_idx) if train_acc == 1: break val_loss = F.cross_entropy(logits[val_idx], labels[val_idx].long()) val_acc = torch.sum(logits[val_idx].argmax(dim=1) == labels[val_idx].long()) val_acc = val_acc.item() / len(val_idx) print("Epoch {:05d} | ".format(epoch) + "Train Accuracy: {:.4f} | Train Loss: {:.4f} | ".format( train_acc, loss.item()) + "Validation Accuracy: {:.4f} | Validation loss: {:.4f}".format( val_acc, val_loss.item())) Val_Acc.append(val_acc) c=[] for i in range(len(RNN_Hidden_Size)): c.append([RNN_Hidden_Size[i],RGCN_Input_Size[i],RGCN_Hidden_Size[i],DROUPOUT[i],Num_Bases[i],Val_Acc[i]]) with open('result.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile) for row in c: writer.writerow(row)
def main(args): # load graph data data = load_data(args.dataset, bfs_level=args.bfs_level, relabel=args.relabel) num_nodes = data.num_nodes num_rels = data.num_rels num_classes = data.num_classes labels = data.labels train_idx = data.train_idx test_idx = data.test_idx # split dataset into train, validate, test if args.validation: val_idx = train_idx[:len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5:] else: val_idx = train_idx # since the nodes are featureless, the input feature is then the node id. feats = tf.range(num_nodes, dtype=tf.int64) # edge type and normalization factor edge_type = tf.convert_to_tensor(data.edge_type) edge_norm = tf.expand_dims(tf.convert_to_tensor(data.edge_norm), 1) labels = tf.reshape(tf.convert_to_tensor(labels), (-1, )) # check cuda if args.gpu < 0: device = "/cpu:0" use_cuda = False else: device = "/gpu:{}".format(args.gpu) use_cuda = True with tf.device(device): # create graph g = DGLGraph() g.add_nodes(num_nodes) g.add_edges(data.edge_src, data.edge_dst) # create model model = EntityClassify(len(g), args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_self_loop=args.use_self_loop, use_cuda=use_cuda) # optimizer optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr) # training loop print("start training...") forward_time = [] backward_time = [] loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=False) for epoch in range(args.n_epochs): t0 = time.time() with tf.GradientTape() as tape: logits = model(g, feats, edge_type, edge_norm) loss = loss_fcn(tf.gather(labels, train_idx), tf.gather(logits, train_idx)) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in model.trainable_weights: loss = loss + \ args.l2norm * tf.nn.l2_loss(weight) t1 = time.time() grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) t2 = time.time() forward_time.append(t1 - t0) backward_time.append(t2 - t1) print( "Epoch {:05d} | Train Forward Time(s) {:.4f} | Backward Time(s) {:.4f}" .format(epoch, forward_time[-1], backward_time[-1])) train_acc = acc(logits, labels, train_idx) val_loss = loss_fcn(tf.gather(labels, val_idx), tf.gather(logits, val_idx)) val_acc = acc(logits, labels, val_idx) print( "Train Accuracy: {:.4f} | Train Loss: {:.4f} | Validation Accuracy: {:.4f} | Validation loss: {:.4f}" .format(train_acc, loss.numpy().item(), val_acc, val_loss.numpy().item())) print() logits = model(g, feats, edge_type, edge_norm) test_loss = loss_fcn(tf.gather(labels, test_idx), tf.gather(logits, test_idx)) test_acc = acc(logits, labels, test_idx) print("Test Accuracy: {:.4f} | Test loss: {:.4f}".format( test_acc, test_loss.numpy().item())) print() print("Mean forward time: {:4f}".format( np.mean(forward_time[len(forward_time) // 4:]))) print("Mean backward time: {:4f}".format( np.mean(backward_time[len(backward_time) // 4:])))
def main(args): # load graph data data = load_data(args.dataset, bfs_level=args.bfs_level, relabel=args.relabel) num_nodes = data.num_nodes num_rels = data.num_rels num_classes = data.num_classes labels = data.labels train_idx = data.train_idx test_idx = data.test_idx # split dataset into train, validate, test if args.validation: val_idx = train_idx[:len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5:] else: val_idx = train_idx # since the nodes are featureless, the input feature is then the node id. feats = torch.arange(num_nodes) # edge type and normalization factor edge_type = torch.from_numpy(data.edge_type) edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1) labels = torch.from_numpy(labels).view(-1) # check cuda use_cuda = args.gpu >= 0 and torch.cuda.is_available() if use_cuda: torch.cuda.set_device(args.gpu) feats = feats.cuda() edge_type = edge_type.cuda() edge_norm = edge_norm.cuda() labels = labels.cuda() # create graph g = DGLGraph() g.add_nodes(num_nodes) g.add_edges(data.edge_src, data.edge_dst) # create model model = EntityClassify(len(g), args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_self_loop=args.use_self_loop, use_cuda=use_cuda) if use_cuda: model.cuda() # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2norm) # training loop print("start training...") forward_time = [] backward_time = [] model.train() for epoch in range(args.n_epochs): optimizer.zero_grad() t0 = time.time() logits = model(g, feats, edge_type, edge_norm) loss = F.cross_entropy(logits[train_idx], labels[train_idx]) t1 = time.time() loss.backward() optimizer.step() t2 = time.time() forward_time.append(t1 - t0) backward_time.append(t2 - t1) print( "Epoch {:05d} | Train Forward Time(s) {:.4f} | Backward Time(s) {:.4f}" .format(epoch, forward_time[-1], backward_time[-1])) train_acc = torch.sum(logits[train_idx].argmax( dim=1) == labels[train_idx]).item() / len(train_idx) val_loss = F.cross_entropy(logits[val_idx], labels[val_idx]) val_acc = torch.sum(logits[val_idx].argmax( dim=1) == labels[val_idx]).item() / len(val_idx) print( "Train Accuracy: {:.4f} | Train Loss: {:.4f} | Validation Accuracy: {:.4f} | Validation loss: {:.4f}" .format(train_acc, loss.item(), val_acc, val_loss.item())) print() model.eval() logits = model.forward(g, feats, edge_type, edge_norm) test_loss = F.cross_entropy(logits[test_idx], labels[test_idx]) test_acc = torch.sum(logits[test_idx].argmax( dim=1) == labels[test_idx]).item() / len(test_idx) print("Test Accuracy: {:.4f} | Test loss: {:.4f}".format( test_acc, test_loss.item())) print() print("Mean forward time: {:4f}".format( np.mean(forward_time[len(forward_time) // 4:]))) print("Mean backward time: {:4f}".format( np.mean(backward_time[len(backward_time) // 4:])))
if self.features is not None: g.ndata['id'] = self.features for layer in self.layers: layer(g) return g.ndata.pop('h') ############################################################################### # Handle dataset # ~~~~~~~~~~~~~~~~ # In this tutorial, we use AIFB dataset from R-GCN paper: # load graph data from dgl.contrib.data import load_data import numpy as np data = load_data(dataset='aifb') num_nodes = data.num_nodes num_rels = data.num_rels num_classes = data.num_classes labels = data.labels train_idx = data.train_idx # split training and validation set val_idx = train_idx[:len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5:] # edge type and normalization factor edge_type = torch.from_numpy(data.edge_type) edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1) labels = torch.from_numpy(labels).view(-1)
from dgl.data import citation_graph from dgl.contrib.data import load_data from dgl import DGLGraph from runtime.dgl.gcn import GCN, GCNSPMV from runtime.dgl.gat import GAT, GATSPMV from runtime.dgl.rgcn import RGCN, RGCNSPMV from runtime.dgl.train import train_runtime from runtime.dgl.hidden import HiddenPrint device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') with HiddenPrint(): Cora = citation_graph.load_cora() CiteSeer = citation_graph.load_citeseer() PubMed = citation_graph.load_pubmed() MUTAG = load_data('mutag') # fair comparison # One training run before we start tracking duration to warm up GPU. g = DGLGraph(Cora.graph) g.set_n_initializer(dgl.init.zero_initializer) g.add_edges(g.nodes(), g.nodes()) norm = torch.pow(g.in_degrees().float(), -0.5) norm[torch.isinf(norm)] = 0 g.ndata['norm'] = norm.unsqueeze(1).to(device) model = GCNSPMV(g, Cora.features.shape[1], Cora.num_labels).to(device) train_runtime(model, Cora, epochs=200, device=device) for d, Net in product([Cora, CiteSeer, PubMed], [GCN, GCNSPMV, GAT, GATSPMV]): g = DGLGraph(d.graph) g.set_n_initializer(dgl.init.zero_initializer) g.add_edges(g.nodes(), g.nodes())
def main(args): # load graph data data = load_data(args.dataset) num_nodes = data.num_nodes train_data = data.train valid_data = data.valid test_data = data.test num_rels = data.num_rels all_data = np.concatenate((train_data, valid_data, test_data), axis=0) if (use_shuriken): monitor = ShurikenMonitor() use_cuda = torch.cuda.is_available() # create model model = LinkPredict(num_nodes, args.n_hidden, num_rels, args.model, num_bases=args.n_bases, num_hidden_layers=args.n_layers, dropout=args.dropout, use_cuda=use_cuda, reg_param=args.regularization, skip_connection=args.skip_connection, rel_activation=args.rel_activation, rel_dropout=args.rel_dropout) # check if there is a model with the same hyperparameters saved new_model, res = load_model(args, model) epoch = 0 if (res != 0): model = new_model epoch = res best_model, best_mrr = load_best_model(args, model) # validation and testing triplets valid_data = torch.LongTensor(valid_data) test_data = torch.LongTensor(test_data) # all_data = torch.LongTensor(all_data.astype(set)) # build test graph test_graph, test_rel, test_norm, incidence_in_test, incidence_out_test = utils.build_test_graph( num_nodes, num_rels, train_data) test_deg = test_graph.in_degrees(range( test_graph.number_of_nodes())).float().view(-1, 1) test_node_id = torch.arange(0, num_nodes, dtype=torch.long).view(-1, 1) test_rel = torch.from_numpy(test_rel) test_norm = torch.from_numpy(test_norm).view(-1, 1) test_graph.ndata.update({'id': test_node_id, 'norm': test_norm}) test_graph.edata['type'] = test_rel if use_cuda: model.cuda() # build adj list and calculate degrees for sampling adj_list, degrees = utils.get_adj_and_degrees(num_nodes, train_data) # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # model_state_file = 'model_state_' + str(args.dropout) + '_' + str(args.lr) + '_' + str(args.regularization) + '.pth' forward_time = [] backward_time = [] print("start training...") weight = torch.randn(num_rels * 2, args.n_hidden).cuda() while True: model.train() epoch += 1 # perform edge neighborhood sampling to generate training graph and data g, node_id, edge_type, node_norm, data, labels, incidence_in, incidence_out = \ utils.generate_sampled_graph_and_labels( train_data, args.graph_batch_size, args.graph_split_size, num_rels, adj_list, degrees, args.negative_sample) # set node/edge feature node_id = torch.from_numpy(node_id).view(-1, 1) edge_type = torch.from_numpy(edge_type) node_norm = torch.from_numpy(node_norm).view(-1, 1) data, labels = torch.from_numpy(data), torch.from_numpy(labels) deg = g.in_degrees(range(g.number_of_nodes())).float().view(-1, 1) if use_cuda: node_id, deg = node_id.cuda(), deg.cuda() edge_type, node_norm = edge_type.cuda(), node_norm.cuda() data, labels = data.cuda(), labels.cuda() g.ndata.update({'id': node_id, 'norm': node_norm}) g.edata['type'] = edge_type t0 = time.time() loss, weight = model.get_loss(g, data, labels, weight, incidence_in, incidence_out) weight = weight.detach() t1 = time.time() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_norm) # clip gradients optimizer.step() t2 = time.time() forward_time.append(t1 - t0) backward_time.append(t2 - t1) print( "Epoch {:04d} | Loss {:.4f} | Best MRR {:.4f} | Forward {:.4f}s | Backward {:.4f}s" .format(epoch, loss.item(), best_mrr, forward_time[-1], backward_time[-1])) optimizer.zero_grad() # validation if epoch % args.evaluate_every == 0: save_model(args, model, epoch, optimizer) # perform validation on CPU because full graph is too large if use_cuda: model.cpu() model.eval() print("start eval") # mrr_f = utils.evaluate_filtered(test_graph, model, valid_data, all_data, num_nodes, weight, incidence_in_test, incidence_out_test, hits=[1, 3, 10], eval_bz=args.eval_batch_size) mrr = utils.evaluate(test_graph, model, valid_data, num_nodes, weight, incidence_in_test, incidence_out_test, hits=[1, 3, 10], eval_bz=args.eval_batch_size) if (use_shuriken): monitor.send_info(epoch, {"mrr": mrr}) # save best model if mrr < best_mrr: if epoch >= args.n_epochs: break else: best_mrr = mrr best_model = model try: os.makedirs(args.model) except FileExistsError: pass save_best_model(args, model, best_mrr) if use_cuda: model.cuda() print("Mean forward time: {:4f}s".format(np.mean(forward_time))) print("Mean Backward time: {:4f}s".format(np.mean(backward_time))) print("\nstart testing:") # use best model checkpoint # checkpoint = torch.load(args.model + "/" + model_state_file) if use_cuda: best_model.cpu() # test on CPU best_model.eval() test_mrr = utils.evaluate(test_graph, best_model, test_data, num_nodes, weight, incidence_in_test, incidence_out_test, hits=[1, 3, 10], eval_bz=args.eval_batch_size) if (use_shuriken): monitor.send_info(epoch, {"test mrr": test_mrr})
g.ndata['id'] = self.features for layer in self.layers: layer(g) return g.ndata.pop('h') ############################################################################### # Handle dataset # ~~~~~~~~~~~~~~~~ # This tutorial uses Institute for Applied Informatics and Formal Description Methods (AIFB) dataset from R-GCN paper. # load graph data from dgl.contrib.data import load_data import numpy as np data = load_data(dataset='mutag') #if dataset in ['aifb', 'mutag', 'bgs', 'am']: num_nodes = data.num_nodes num_rels = data.num_rels num_classes = data.num_classes labels = data.labels train_idx = data.train_idx # split training and validation set val_idx = train_idx[:len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5:] # edge type and normalization factor edge_type = torch.from_numpy(data.edge_type) edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1) labels = torch.from_numpy(labels).view(-1)
def main(args): # load graph data data = load_data( args.dataset, bfs_level=args.bfs_level, relabel=args.relabel) num_nodes = data.num_nodes num_rels = data.num_rels num_classes = data.num_classes labels = data.labels train_idx = data.train_idx # edge type and normalization factor edge_type = torch.from_numpy(data.edge_type) edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1) labels = torch.from_numpy(labels).view(-1) # check cuda use_cuda = args.gpu >= 0 and torch.cuda.is_available() if use_cuda: torch.cuda.set_device(args.gpu) edge_type = edge_type.cuda() edge_norm = edge_norm.cuda() labels = labels.cuda() # create graph g = DGLGraph() g.add_nodes(num_nodes) g.add_edges(data.edge_src, data.edge_dst) g.edata.update({'type': edge_type, 'norm': edge_norm}) # create model model = EntityClassify( len(g), args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_cuda=use_cuda) if use_cuda: model.cuda() # optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.l2norm) # training loop print("start training...") forward_time = [] backward_time = [] model.train() for epoch in range(args.n_epochs): optimizer.zero_grad() t0 = time.time() logits = model.forward(g) loss = F.cross_entropy(logits[train_idx], labels[train_idx]) t1 = time.time() loss.backward() optimizer.step() t2 = time.time() forward_time.append(t1 - t0) backward_time.append(t2 - t1)
def main(args): # load graph data data = load_data(args.dataset, bfs_level=args.bfs_level, relabel=args.relabel) num_nodes = data.num_nodes num_rels = data.num_rels num_classes = data.num_classes labels = data.labels train_idx = data.train_idx test_idx = data.test_idx # split dataset into train, validate, test if args.validation: val_idx = train_idx[:len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5:] else: val_idx = train_idx # edge type and normalization factor edge_type = torch.from_numpy(data.edge_type) edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1) labels = torch.from_numpy(labels).view(-1) # check cuda use_cuda = args.gpu >= 0 and torch.cuda.is_available() if use_cuda: torch.cuda.set_device(args.gpu) edge_type = edge_type.cuda() edge_norm = edge_norm.cuda() labels = labels.cuda() device = 'cuda' else: device = 'cpu' # create graph g = DGLGraph() g.add_nodes(num_nodes) g.add_edges(data.edge_src, data.edge_dst) g.edata.update({'type': edge_type, 'norm': edge_norm}) # create model if args.attention: print("Using Attention") model = EntityClassifyAttention(len(g), args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_cuda=use_cuda) elif args.tucker: print("Using Tucker decomposition") model = EntityClassifyTucker(len(g), args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, core_t=args.core_t, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_cuda=use_cuda) elif args.embedding: print("Using Node Embedding Lookup") model = EntityClassifyEmbedding(len(g), args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_cuda=use_cuda) else: model = EntityClassify(len(g), args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_cuda=use_cuda) if use_cuda: model.cuda() # print number of params def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) def print_parameters(model): for n, p in model.named_parameters(): print(n, p.numel()) gparams = count_parameters(model) print("Params : ", gparams) # optimizer # import pdb; pdb.set_trace() for name, par in model.named_parameters(): print(name, par.shape) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2norm) # training loop print("start training...") forward_time = [] backward_time = [] if args.tucker: norms = {} model.train() last_val_acc = 0 patience = 0 for epoch in range(args.n_epochs): optimizer.zero_grad() t0 = time.time() logits = model.forward(g) loss = F.cross_entropy(logits[train_idx], labels[train_idx]) if args.tucker: f_ns = [] for name, param in model.named_parameters(): if 'core' in name: if name not in norms: norms[name] = [] norms[name].append(torch.norm(param).item()) if 'Us' in name: if name not in norms: norms[name] = [] norms[name].append(torch.norm(param).item()) if args.orthogonal_reg: # apply orthogonal regularixation to the factor matrices reg = 1e-6 orth_loss = torch.empty((1), requires_grad=True, device=device, dtype=torch.float32) for name, param in model.named_parameters(): if 'Us' in name: param_flat = param.view(param.shape[0], -1) sym = torch.mm(param_flat, torch.t(param_flat)) sym -= torch.eye(param_flat.shape[0], device=device) orth_loss = orth_loss + (reg * sym.sum()) loss += orth_loss.item() t1 = time.time() loss.backward() #import pdb; pdb.set_trace() #plot_grad_flow(model.named_parameters()) optimizer.step() t2 = time.time() forward_time.append(t1 - t0) backward_time.append(t2 - t1) print( "Epoch {:05d} | Train Forward Time(s) {:.4f} | Backward Time(s) {:.4f}" .format(epoch, forward_time[-1], backward_time[-1])) train_acc = torch.sum(logits[train_idx].argmax( dim=1) == labels[train_idx]).item() / len(train_idx) val_loss = F.cross_entropy(logits[val_idx], labels[val_idx]) val_acc = torch.sum(logits[val_idx].argmax( dim=1) == labels[val_idx]).item() / len(val_idx) if last_val_acc < val_acc: last_val_acc = val_acc patience = 0 else: patience += 1 print( "Train Accuracy: {:.4f} | Train Loss: {:.4f} | Validation Accuracy: {:.4f} | Validation loss: {:.4f}" .format(train_acc, loss.item(), val_acc, val_loss.item())) if patience > args.patience: print("Exceedeed patience, breaking...") break print() model.eval() logits = model.forward(g) test_loss = F.cross_entropy(logits[test_idx], labels[test_idx]) test_acc = torch.sum(logits[test_idx].argmax( dim=1) == labels[test_idx]).item() / len(test_idx) print( "[togrep] | Run: {} | Test Accuracy: {:.4f} | Test loss: {:.4f} | Params: {}" .format(args.run, test_acc, test_loss.item(), gparams)) print() print("Mean forward time: {:4f}".format( np.mean(forward_time[len(forward_time) // 4:]))) print("Mean backward time: {:4f}".format( np.mean(backward_time[len(backward_time) // 4:]))) #import pdb; pdb.set_trace() predicted_labels = logits[test_idx].argmax(dim=1).cpu().numpy() gold_labels = labels[test_idx].cpu().numpy() indexes = test_idx predictions = { 'index': test_idx, 'gold_labels': gold_labels, 'predicted_labels': predicted_labels, 'indexes': indexes } #pkl.dump(predictions, open('rgcn_{}_run_{}_predictions.pkl'.format(args.dataset, args.run),'wb')) # dump if args.tucker: pkl.dump( norms, open('tucker_{}_run_{}_norms.pkl'.format(args.dataset, args.run), 'wb'))
def main(args): # load graph data data = load_data(args.dataset) num_nodes = data.num_nodes train_data = data.train valid_data = data.valid test_data = data.test num_rels = data.num_rels # check cuda use_cuda = args.gpu >= 0 and torch.cuda.is_available() if use_cuda: torch.cuda.set_device(args.gpu) # create model model = LinkPredict(num_nodes, args.n_hidden, num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers, dropout=args.dropout, use_cuda=use_cuda, reg_param=args.regularization, tucker=args.tucker, tucker_core=args.tucker_core) # validation and testing triplets valid_data = torch.LongTensor(valid_data) test_data = torch.LongTensor(test_data) # build test graph test_graph, test_rel, test_norm = utils.build_test_graph( num_nodes, num_rels, train_data) test_deg = test_graph.in_degrees(range( test_graph.number_of_nodes())).float().view(-1, 1) test_node_id = torch.arange(0, num_nodes, dtype=torch.long).view(-1, 1) test_rel = torch.from_numpy(test_rel) test_norm = torch.from_numpy(test_norm).view(-1, 1) test_graph.ndata.update({'id': test_node_id, 'norm': test_norm}) test_graph.edata['type'] = test_rel if use_cuda: model.cuda() # build adj list and calculate degrees for sampling adj_list, degrees = utils.get_adj_and_degrees(num_nodes, train_data) # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) model_state_file = 'model_state.pth' forward_time = [] backward_time = [] # training loop print("start training...") epoch = 0 best_mrr = 0 while True: model.train() epoch += 1 # perform edge neighborhood sampling to generate training graph and data g, node_id, edge_type, node_norm, data, labels = \ utils.generate_sampled_graph_and_labels( train_data, args.graph_batch_size, args.graph_split_size, num_rels, adj_list, degrees, args.negative_sample) print("Done edge sampling") # set node/edge feature node_id = torch.from_numpy(node_id).view(-1, 1) edge_type = torch.from_numpy(edge_type) node_norm = torch.from_numpy(node_norm).view(-1, 1) data, labels = torch.from_numpy(data), torch.from_numpy(labels) deg = g.in_degrees(range(g.number_of_nodes())).float().view(-1, 1) if use_cuda: node_id, deg = node_id.cuda(), deg.cuda() edge_type, node_norm = edge_type.cuda(), node_norm.cuda() data, labels = data.cuda(), labels.cuda() g.ndata.update({'id': node_id, 'norm': node_norm}) g.edata['type'] = edge_type t0 = time.time() loss = model.get_loss(g, data, labels) t1 = time.time() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_norm) # clip gradients optimizer.step() t2 = time.time() forward_time.append(t1 - t0) backward_time.append(t2 - t1) print( "Epoch {:04d} | Loss {:.4f} | Best MRR {:.4f} | Forward {:.4f}s | Backward {:.4f}s" .format(epoch, loss.item(), best_mrr, forward_time[-1], backward_time[-1])) optimizer.zero_grad() # validation if epoch % args.evaluate_every == 0: # perform validation on CPU because full graph is too large if use_cuda: model.cpu() model.eval() print("start eval") mrr = utils.evaluate(test_graph, model, valid_data, num_nodes, hits=[1, 3, 10], eval_bz=args.eval_batch_size) # save best model if mrr < best_mrr: if epoch >= args.n_epochs: break else: best_mrr = mrr torch.save({ 'state_dict': model.state_dict(), 'epoch': epoch }, model_state_file) if use_cuda: model.cuda() print("training done") print("Mean forward time: {:4f}s".format(np.mean(forward_time))) print("Mean Backward time: {:4f}s".format(np.mean(backward_time))) print("\nstart testing:") # use best model checkpoint checkpoint = torch.load(model_state_file) if use_cuda: model.cpu() # test on CPU model.eval() model.load_state_dict(checkpoint['state_dict']) print("Using best epoch: {}".format(checkpoint['epoch'])) utils.evaluate(test_graph, model, test_data, num_nodes, hits=[1, 3, 10], eval_bz=args.eval_batch_size)
'n_bases': 100, 'n_layers': 2, 'n_epochs': 10, 'dataset': 'FB15k-237', 'eval_batch_size': 500, 'regularization': 0.01, 'grad_norm': 1.0, 'graph_batch_size': 3000, 'graph_split_size': 0.5, 'negative_sample': 10, 'evaluate_every': 10 } """## Scripts for running""" # load graph data data = load_data(args['dataset']) num_nodes = data.num_nodes train_data = data.train valid_data = data.valid test_data = data.test num_rels = data.num_rels # check cuda use_cuda = args['gpu'] >= 0 and torch.cuda.is_available() if use_cuda: torch.device('cuda') # create model model = LinkPredict(num_nodes, args['n_hidden'], num_rels,
def main(args): # load graph data data = load_data(args.dataset) num_nodes = data.num_nodes train_data = data.train valid_data = data.valid test_data = data.test num_rels = data.num_rels # check cuda use_cuda = args.gpu >= 0 and torch.cuda.is_available() if use_cuda: torch.cuda.set_device(args.gpu) # create model model = LinkPredict( model_class=RGCN, #KGVAE, #RGCN in_dim=num_nodes, h_dim=args.n_hidden, num_rels=num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers, dropout=args.dropout, use_cuda=True, reg_param=args.regularization, kl_param=args.kl_param, ) # validation and testing triplets valid_data = torch.LongTensor(valid_data) test_data = torch.LongTensor(test_data) # build val graph val_adj_list, val_degrees = utils.get_adj_and_degrees( num_nodes, valid_data) if use_cuda: model.cuda() # build adj list and calculate degrees for sampling adj_list, degrees = utils.get_adj_and_degrees(num_nodes, train_data) # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) model_state_file = 'model_state.pth' forward_time = [] backward_time = [] # training loop print("start training...") epoch = 0 best_mrr = 0 train_records = open("train_records.txt", "w") val_records = open("val_records.txt", "w") while True: model.train() epoch += 1 # perform edge neighborhood sampling to generate training graph and data g, node_id, edge_type, node_norm, data, labels = \ utils.generate_sampled_graph_and_labels( train_data, args.graph_batch_size, args.graph_split_size, num_rels, adj_list, degrees, args.negative_sample, args.edge_sampler) # print("Done edge sampling") # set node/edge feature node_id = torch.from_numpy(node_id).view(-1, 1).long() edge_type = torch.from_numpy(edge_type) edge_norm = node_norm_to_edge_norm( g, torch.from_numpy(node_norm).view(-1, 1)) data, labels = torch.from_numpy(data), torch.from_numpy(labels) deg = g.in_degrees(range(g.number_of_nodes())).float().view(-1, 1) if use_cuda: node_id, deg = node_id.cuda(), deg.cuda() edge_type, edge_norm = edge_type.cuda(), edge_norm.cuda() data, labels = data.cuda(), labels.cuda() t0 = time.time() embed = model(g, node_id, edge_type, edge_norm) loss, pred_loss, kl = model.get_loss(g, embed, data, labels) train_records.write("{:d};{:.4f};{:.4f};{:.4f}\n".format( epoch, loss.item(), pred_loss.item(), kl.item())) train_records.flush() t1 = time.time() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_norm) # clip gradients optimizer.step() t2 = time.time() forward_time.append(t1 - t0) backward_time.append(t2 - t1) print( "Epoch {:04d} | Loss {:.4f} | Best MRR {:.4f} | pred {:.4f} | reg {:.4f} | kl {:.4f} " .format(epoch, loss.item(), best_mrr, pred_loss, loss - pred_loss - kl, kl)) optimizer.zero_grad() # validation if epoch % args.evaluate_every == 1: val_g, val_node_id, val_edge_type, val_node_norm, val_data, val_labels = utils.generate_sampled_graph_and_labels( valid_data, args.graph_batch_size, args.graph_split_size, num_rels, val_adj_list, val_degrees, args.negative_sample, args.edge_sampler) # print("Done edge sampling for validation") val_node_id = torch.from_numpy(val_node_id).view(-1, 1).long() val_edge_type = torch.from_numpy(val_edge_type) val_edge_norm = node_norm_to_edge_norm( val_g, torch.from_numpy(val_node_norm).view(-1, 1)) val_data, val_labels = torch.from_numpy( val_data), torch.from_numpy(val_labels) if use_cuda: val_node_id = val_node_id.cuda() val_edge_type, val_edge_norm = val_edge_type.cuda( ), val_edge_norm.cuda() val_data, val_neg_samples = val_data.cuda(), val_labels.cuda() embed = model(val_g, val_node_id, val_edge_type, val_edge_norm) mr, mrr, hits = utils.calc_mrr(embed, model.w_relation, val_data[val_labels == 1], hits=[1, 3, 10], eval_bz=args.eval_batch_size) val_records.write("{:d};{:.4f};{:.4f};\n".format(epoch, mr, mrr) + ';'.join([str(i) for i in hits]) + "\n") val_records.flush() if mrr < best_mrr: if epoch >= args.n_epochs: break else: best_mrr = mrr print("Best mrr", mrr) torch.save({ 'state_dict': model.state_dict(), 'epoch': epoch }, model_state_file) print("training done") print("Mean forward time: {:4f}s".format(np.mean(forward_time))) print("Mean Backward time: {:4f}s".format(np.mean(backward_time)))
def main(args): # load graph data data = load_data(args.dataset) num_nodes = data.num_nodes train_data = data.train valid_data = data.valid test_data = data.test num_rels = data.num_rels # check cuda use_cuda = args.gpu >= 0 and torch.cuda.is_available() if use_cuda: torch.cuda.set_device(args.gpu) # create model if args.model_class == "KGVAE": model_class = KGVAE else: model_class = RGCN model = LinkPredict(model_class=model_class, in_dim=num_nodes, h_dim=args.n_hidden, num_rels=num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers, dropout=args.dropout, use_cuda=use_cuda, reg_param=args.regularization, kl_param=args.kl_param, mmd_param=args.mmd_param, k=args.mog_k, n_flows=args.n_flows) # validation and testing triplets valid_data = torch.LongTensor(valid_data) # build test graph val_graph, val_rel, val_norm = utils.build_test_graph( num_nodes, num_rels, valid_data) # val_deg = val_graph.in_degrees( # range(val_graph.number_of_nodes())).float().view(-1, 1) val_node_id = torch.arange(0, num_nodes, dtype=torch.long).view(-1, 1) val_rel = torch.from_numpy(val_rel) val_norm = node_norm_to_edge_norm(val_graph, torch.from_numpy(val_norm).view(-1, 1)) if use_cuda: model.cuda() # build adj list and calculate degrees for sampling adj_list, degrees = utils.get_adj_and_degrees(num_nodes, train_data) # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) forward_time = [] backward_time = [] if args.test_mode is True: print("\nstart testing:") # use best model checkpoint checkpoint = torch.load(args.model_state_file) test_data = torch.LongTensor(test_data) # build test graph test_graph, test_rel, test_norm = utils.build_test_graph( num_nodes, num_rels, test_data) # test_deg = test_graph.in_degrees( # range(test_graph.number_of_nodes())).float().view(-1, 1) test_node_id = torch.arange(0, num_nodes, dtype=torch.long).view(-1, 1) test_rel = torch.from_numpy(test_rel) test_norm = node_norm_to_edge_norm( test_graph, torch.from_numpy(test_norm).view(-1, 1)) if use_cuda: model.cpu() # test on CPU model.eval() model.load_state_dict(checkpoint['state_dict']) print("Using best epoch: {}".format(checkpoint['epoch'])) embed = model(test_graph, test_node_id, test_rel, test_norm) if args.generate: print("\nstart generating obama neighborhood:") # use best model checkpoint utils.generate(embed, model.w_relation, test_data) utils.calc_mrr(embed, model.w_relation, test_data, hits=[1, 3, 10], eval_bz=args.eval_batch_size, all_batches=True, flow_log_prob=model.encoder.get_flow_log_prob()) exit(0) # training loop print("start training...") epoch = 0 best_mrr = 0 if args.load is True: print(f"Loading checkpoint file {args.model_state_file} for training") checkpoint = torch.load(args.model_state_file) model.load_state_dict(checkpoint['state_dict']) epoch = checkpoint['epoch'] while True: model.train() epoch += 1 # perform edge neighborhood sampling to generate training graph and data g, node_id, edge_type, node_norm, data, labels = \ utils.generate_sampled_graph_and_labels( train_data, args.graph_batch_size, args.graph_split_size, num_rels, adj_list, degrees, args.negative_sample, args.edge_sampler) # set node/edge feature node_id = torch.from_numpy(node_id).view(-1, 1).long() edge_type = torch.from_numpy(edge_type) edge_norm = node_norm_to_edge_norm( g, torch.from_numpy(node_norm).view(-1, 1)) data, labels = torch.from_numpy(data), torch.from_numpy(labels) deg = g.in_degrees(range(g.number_of_nodes())).float().view(-1, 1) if use_cuda: node_id, deg = node_id.cuda(), deg.cuda() edge_type, edge_norm = edge_type.cuda(), edge_norm.cuda() data, labels = data.cuda(), labels.cuda() t0 = time.time() embed = model(g, node_id, edge_type, edge_norm) loss, pred_loss, kl, mmd = model.get_loss(g, embed, data, labels) t1 = time.time() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_norm) # clip gradients optimizer.step() t2 = time.time() forward_time.append(t1 - t0) backward_time.append(t2 - t1) print( "Epoch {:04d} | Loss {:.4f} | Best MRR {:.4f} | pred_loss {:.4f} | kl {:.4f} | mmd {:.4f}" .format(epoch, loss.item(), best_mrr, pred_loss.item(), kl.item(), mmd.item())) optimizer.zero_grad() # validation if epoch % args.evaluate_every == 0: # perform validation on CPU because full graph is too large if use_cuda: model.cpu() model.eval() print("start eval") torch.save({ 'state_dict': model.state_dict(), 'epoch': epoch }, args.model_state_file) embed = model(val_graph, val_node_id, val_rel, val_norm) mrr = utils.calc_mrr(embed, model.w_relation, valid_data, hits=[1, 3, 10], eval_bz=args.eval_batch_size, all_batches=False) # save best model if mrr < best_mrr: torch.save({ 'state_dict': model.state_dict(), 'epoch': epoch }, args.model_state_file + "_latest") else: best_mrr = mrr torch.save({ 'state_dict': model.state_dict(), 'epoch': epoch }, args.model_state_file) if use_cuda: model.cuda() if epoch >= args.n_epochs: break print("training done") print("Mean forward time: {:4f}s".format(np.mean(forward_time))) print("Mean Backward time: {:4f}s".format(np.mean(backward_time)))