class CitationGNNManager(object): def __init__(self, args): self.args = args if hasattr(args, 'dataset') and args.dataset in ["cora", "citeseer", "pubmed"]: self.data = load(args) self.args.in_feats = self.in_feats = self.data.features.shape[1] self.args.num_class = self.n_classes = self.data.num_labels self.early_stop_manager = EarlyStop(10) self.reward_manager = TopAverage(10) print('the experiment config:', '\n', args) self.args = args self.drop_out = args.in_drop self.multi_label = args.multi_label self.lr = args.lr self.weight_decay = args.weight_decay self.retrain_epochs = args.retrain_epochs self.loss_fn = torch.nn.BCELoss() # binary cross entropy loss self.epochs = args.epochs self.train_graph_index = 0 self.train_set_length = 10 self.param_file = args.param_file self.shared_params = None self.loss_fn = torch.nn.functional.nll_loss def load_param(self): # don't share param pass def save_param(self, model, update_all=False): # don't share param pass # train from scratch def evaluate(self, actions=None, format="two"): actions = process_action(actions, format, self.args) print("train action:", actions) # create model model = self.build_gnn(actions) if self.args.cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay) try: model, val_acc, test_acc = self.run_model(model, optimizer, self.loss_fn, self.data, self.epochs, cuda=self.args.cuda, return_best=True, half_stop_score=max(self.reward_manager.get_top_average() * 0.7, 0.4)) except RuntimeError as e: if "cuda" in str(e) or "CUDA" in str(e): print(e) val_acc = 0 test_acc = 0 else: raise e return val_acc, test_acc # train from scratch def train(self, actions=None, format="two"): origin_action = actions actions = process_action(actions, format, self.args) print("train gnn structures:", actions) # create model model = self.build_gnn(actions) try: if self.args.cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay) model, val_acc = self.run_model(model, optimizer, self.loss_fn, self.data, self.epochs, cuda=self.args.cuda, half_stop_score=max(self.reward_manager.get_top_average() * 0.7, 0.4) # , show_info=True ) except RuntimeError as e: if "cuda" in str(e) or "CUDA" in str(e): print(e) val_acc = 0 else: raise e reward = self.reward_manager.get_reward(val_acc) # 模型gnn, reward, val_acc # self.record_action_info(origin_action, reward, val_acc) return reward, val_acc def record_action_info(self, origin_action, reward, val_acc): with open(self.args.dataset + "_" + self.args.search_mode + self.args.submanager_log_file, "a") as file: file.write(str(origin_action)) file.write(";") file.write(str(val_acc)) file.write("\n") def build_gnn(self, actions): model = GraphNet(actions, self.in_feats, self.n_classes, drop_out=self.args.in_drop, multi_label=False, batch_normal=False) return model def retrain(self, actions, format="two"): return self.train(actions, format) def test_with_param(self, actions=None, format="two", with_retrain=False): return self.train(actions, format) @staticmethod def run_model(model, optimizer, loss_fn, data, epochs, early_stop=5, tmp_model_file="geo_citation.pkl", half_stop_score=0, return_best=False, cuda=True, need_early_stop=False, show_info=False): print('chamou o run_model da CitationGNNManager') dur = [] begin_time = time.time() best_performance = 0 min_val_loss = float("inf") min_train_loss = float("inf") model_val_acc = 0 features, g, labels, mask, val_mask, test_mask, n_edges = CitationGNNManager.prepare_data(data, cuda) for epoch in range(1, epochs + 1): model.train() t0 = time.time() # forward logits = model(features, g) logits = F.log_softmax(logits, 1) loss = loss_fn(logits[mask], labels[mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_loss = loss.item() # evaluate model.eval() logits = model(features, g) logits = F.log_softmax(logits, 1) train_acc = evaluate(logits, labels, mask) dur.append(time.time() - t0) val_loss = float(loss_fn(logits[val_mask], labels[val_mask])) val_acc = evaluate(logits, labels, val_mask) test_acc = evaluate(logits, labels, test_mask) if val_loss < min_val_loss: # and train_loss < min_train_loss min_val_loss = val_loss min_train_loss = train_loss model_val_acc = val_acc if test_acc > best_performance: best_performance = test_acc if show_info: print( "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | acc {:.4f} | val_acc {:.4f} | test_acc {:.4f}".format( epoch, loss.item(), np.mean(dur), train_acc, val_acc, test_acc)) end_time = time.time() print("Each Epoch Cost Time: %f " % ((end_time - begin_time) / epoch)) print(f"val_score:{model_val_acc},test_score:{best_performance}") if return_best: return model, model_val_acc, best_performance else: return model, model_val_acc @staticmethod def prepare_data(data, cuda=True): features = torch.FloatTensor(data.features) print('features: ', features) labels = torch.LongTensor(data.labels) print('labels: ', labels) mask = torch.ByteTensor(data.train_mask) print('mask: ', mask) test_mask = torch.ByteTensor(data.test_mask) print('test_mask: ', test_mask) val_mask = torch.ByteTensor(data.val_mask) print('val_mask: ', val_mask) n_edges = data.graph.number_of_edges() print('n_edges: ', n_edges) # create DGL graph g = DGLGraph(data.graph) # add self loop g.add_edges(g.nodes(), g.nodes()) degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: features = features.cuda() labels = labels.cuda() norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) return features, g, labels, mask, val_mask, test_mask, n_edges
class CitationGNNManager(object): def __init__(self, args): self.args = args if hasattr(args, 'dataset') and args.dataset in [ "cora", "citeseer", "pubmed" ]: self.data = load(args) self.args.in_feats = self.in_feats = self.data.features.shape[1] self.args.num_class = self.n_classes = self.data.num_labels self.early_stop_manager = EarlyStop(10) self.reward_manager = TopAverage(10) """ class TopAverage(object): def __init__(self, top_k=10): self.scores = [] self.top_k = top_k def get_top_average(self): if len(self.scores) > 0: return np.mean(self.scores) else: return 0 def get_average(self, score): if len(self.scores) > 0: avg = np.mean(self.scores) else: avg = 0 # print("Top %d average: %f" % (self.top_k, avg)) self.scores.append(score) self.scores.sort(reverse=True) self.scores = self.scores[:self.top_k] return avg def get_reward(self, score): reward = score - self.get_average(score) return np.clip(reward, -0.5, 0.5) """ self.args = args self.drop_out = args.in_drop self.multi_label = args.multi_label self.lr = args.lr self.weight_decay = args.weight_decay self.retrain_epochs = args.retrain_epochs self.loss_fn = torch.nn.BCELoss() self.epochs = args.epochs self.train_graph_index = 0 self.train_set_length = 10 self.param_file = args.param_file self.shared_params = None self.loss_fn = torch.nn.functional.nll_loss def load_param(self): # don't share param pass def save_param(self, model, update_all=False): # don't share param pass # train from scratch def evaluate(self, actions=None, format="two"): actions = process_action(actions, format, self.args) print("train action:", actions) # create model model = self.build_gnn(actions) if self.args.cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay) try: model, val_acc, test_acc = self.run_model( model, optimizer, self.loss_fn, self.data, self.epochs, cuda=self.args.cuda, return_best=True, half_stop_score=max( self.reward_manager.get_top_average() * 0.7, 0.4)) """ class TopAverage(object): def __init__(self, top_k=10): self.scores = [] self.top_k = top_k def get_top_average(self): if len(self.scores) > 0: return np.mean(self.scores) else: return 0 """ except RuntimeError as e: if "cuda" in str(e) or "CUDA" in str(e): print(e) val_acc = 0 test_acc = 0 else: raise e return val_acc, test_acc # train from scratch def train(self, actions=None, format="two"): # actions = ['gat', 'sum', 'relu', 2, 8, 'linear', 'mlp', 'tanh', 2, 4] # format="two" origin_action = actions # 分类任务类别数修改第二层GNN模型的输出维度 # 修改为任务类别的维度数,Citeseer数据集的类别为6,则第二层GNN最后输出维度为6 actions = process_action(actions, format, self.args) # actions = ['gat', 'sum', 'relu', 2, 8, 'linear', 'mlp', 'tanh', 2, 6] print("train action:", actions) # create model # 基于选择出的GNN结构构建GNN model = self.build_gnn(actions) try: if self.args.cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay) model, val_acc = self.run_model( model, optimizer, self.loss_fn, self.data, self.epochs, cuda=self.args.cuda, half_stop_score=max( self.reward_manager.get_top_average() * 0.7, 0.4)) """ class TopAverage(object): def __init__(self, top_k=10): self.scores = [] self.top_k = top_k def get_top_average(self): if len(self.scores) > 0: return np.mean(self.scores) else: return 0 """ except RuntimeError as e: if "cuda" in str(e) or "CUDA" in str(e): print(e) val_acc = 0 else: raise e reward = self.reward_manager.get_reward(val_acc) # 当val_acc大于历史top10的val_acc的均值,则reward为正 # reward被限制在-0.5到0.5之间 """ class TopAverage(object): def get_average(self, score): if len(self.scores) > 0: avg = np.mean(self.scores) else: avg = 0 # print("Top %d average: %f" % (self.top_k, avg)) self.scores.append(score) self.scores.sort(reverse=True) self.scores = self.scores[:self.top_k] return avg def get_reward(self, score): reward = score - self.get_average(score) return np.clip(reward, -0.5, 0.5) """ self.save_param(model, update_all=(reward > 0)) # 模型没有共享参数 self.record_action_info(origin_action, reward, val_acc) # 将gnn结构,reward,val_acc信息记录到log文件中 return reward, val_acc def record_action_info(self, origin_action, reward, val_acc): with open( self.args.dataset + "_" + self.args.search_mode + self.args.submanager_log_file, "a") as file: # with open(f'{self.args.dataset}_{self.args.search_mode}_{self.args.format}_manager_result.txt', "a") as file: file.write(str(origin_action)) file.write(";") file.write(str(reward)) file.write(";") file.write(str(val_acc)) file.write("\n") def build_gnn(self, actions): model = GraphNet(actions, self.in_feats, self.n_classes, drop_out=self.args.in_drop, multi_label=False, batch_normal=False) # actions = ['gat', 'sum', 'relu', 2, 8, 'linear', 'mlp', 'tanh', 2, 6] # self.in_feats = 3703 # self.n_classes = 6 # drop_out = 0.6 return model # 验证模型 def retrain(self, actions, format="two"): return self.train(actions, format) def test_with_param(self, actions=None, format="two", with_retrain=False): # actions = ['gat', 'sum', 'relu', 2, 8, 'linear', 'mlp', 'tanh', 2, 4] # format = "two" # with_retrain = True return self.train(actions, format) @staticmethod def run_model(model, optimizer, loss_fn, data, epochs, early_stop=5, tmp_model_file="geo_citation.pkl", half_stop_score=0, return_best=False, cuda=True, need_early_stop=False, show_info=False): dur = [] begin_time = time.time() best_performance = 0 min_val_loss = float("inf") # 正无穷 min_train_loss = float("inf") # 正无穷 model_val_acc = 0 features, g, labels, mask, val_mask, test_mask, n_edges = CitationGNNManager.prepare_data( data, cuda) for epoch in range(1, epochs + 1): model.train() t0 = time.time() # forward logits = model(features, g) logits = F.log_softmax(logits, 1) loss = loss_fn(logits[mask], labels[mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_loss = loss.item() # evaluate """ def evaluate(output, labels, mask): _, indices = torch.max(output, dim=1) correct = torch.sum(indices[mask] == labels[mask]) return correct.item() * 1.0 / mask.sum().item() """ model.eval() logits = model(features, g) logits = F.log_softmax(logits, 1) train_acc = evaluate(logits, labels, mask) dur.append(time.time() - t0) val_loss = float(loss_fn(logits[val_mask], labels[val_mask])) val_acc = evaluate(logits, labels, val_mask) test_acc = evaluate(logits, labels, test_mask) # 当训练中本次val_loss 低于历史最低值,才会更新 model_val_acc,min_train_loss if val_loss < min_val_loss: # and train_loss < min_train_loss min_val_loss = val_loss min_train_loss = train_loss model_val_acc = val_acc if test_acc > best_performance: best_performance = test_acc if show_info: print( "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | acc {:.4f} | val_acc {:.4f} | test_acc {:.4f}" .format(epoch, loss.item(), np.mean(dur), train_acc, val_acc, test_acc)) end_time = time.time() print("Each Epoch Cost Time: %f " % ((end_time - begin_time) / epoch)) print(f"val_score:{model_val_acc},test_score:{best_performance}") if return_best: return model, model_val_acc, best_performance else: return model, model_val_acc # @staticmethod # def run_model(model, optimizer, loss_fn, data, epochs, early_stop=5, tmp_model_file="citation_testing_2.pkl", # half_stop_score=0, return_best=False, cuda=True, need_early_stop=False): # # early_stop_manager = EarlyStop(early_stop) # # initialize graph # dur = [] # begin_time = time.time() # features, g, labels, mask, val_mask, test_mask, n_edges = CitationGNNManager.prepare_data(data, cuda) # saved = False # best_performance = 0 # for epoch in range(1, epochs + 1): # should_break = False # t0 = time.time() # # model.train() # logits = model(features, g) # logits = F.log_softmax(logits, 1) # loss = loss_fn(logits[mask], labels[mask]) # optimizer.zero_grad() # loss.backward() # optimizer.step() # # model.eval() # logits = model(features, g) # logits = F.log_softmax(logits, 1) # train_acc = evaluate(logits, labels, mask) # train_loss = float(loss) # dur.append(time.time() - t0) # # val_loss = float(loss_fn(logits[val_mask], labels[val_mask])) # val_acc = evaluate(logits, labels, val_mask) # test_acc = evaluate(logits, labels, test_mask) # # print( # "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | acc {:.4f} | val_acc {:.4f} | test_acc {:.4f}".format( # epoch, loss.item(), np.mean(dur), train_acc, val_acc, test_acc)) # # end_time = time.time() # print("Each Epoch Cost Time: %f " % ((end_time - begin_time) / epoch)) # # print("Test Accuracy {:.4f}".format(acc)) # if early_stop_manager.should_save(train_loss, train_acc, val_loss, val_acc): # saved = True # torch.save(model.state_dict(), tmp_model_file) # if test_acc > best_performance: # best_performance = test_acc # if need_early_stop and early_stop_manager.should_stop(train_loss, train_acc, val_loss, val_acc): # should_break = True # if should_break and epoch > 50: # print("early stop") # break # if half_stop_score > 0 and epoch > (epochs / 2) and val_acc < half_stop_score: # print("half_stop") # break # if saved: # model.load_state_dict(torch.load(tmp_model_file)) # model.eval() # val_acc = evaluate(model(features, g), labels, val_mask) # print(evaluate(model(features, g), labels, test_mask)) # if return_best: # return model, val_acc, best_performance # else: # return model, val_acc @staticmethod def prepare_data(data, cuda=True): features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) mask = torch.ByteTensor(data.train_mask) test_mask = torch.ByteTensor(data.test_mask) val_mask = torch.ByteTensor(data.val_mask) n_edges = data.graph.number_of_edges() # create DGL graph g = DGLGraph(data.graph) # add self loop g.add_edges(g.nodes(), g.nodes()) degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: features = features.cuda() labels = labels.cuda() norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) return features, g, labels, mask, val_mask, test_mask, n_edges
class RL_Selector(ModelSelector): """Manage the training process""" def __init__(self, args, search_space, action_list, submodel_manager): """ Constructor for training algorithm. Build sub-model manager and controller. Build optimizer and cross entropy loss for controller. Args: args: From command line, picked up by `argparse`. """ self.args = args self.controller_step = 0 # counter for controller self.cuda = args.cuda self.epoch = 0 self.start_epoch = 0 self.submodel_manager = None self.controller = None self.early_stop_manager = EarlyStop(10) self.reward_manager = TopAverage(10) super(RL_Selector, self).__init__(args, search_space, action_list, submodel_manager) self.build_model() # build controller self.max_length = self.args.shared_rnn_max_length controller_optimizer = _get_optimizer(self.args.controller_optim) self.controller_optim = \ controller_optimizer(self.controller.parameters(), lr=self.args.controller_lr) def build_model(self): # CALLS THIS ONE self.args.share_param = False self.args.shared_initial_step = 0 self.controller = SimpleNASController(self.args, action_list=self.action_list, search_space=self.search_space, cuda=self.args.cuda) if self.cuda: self.controller.cuda() def train(self): """ Each epoch consists of two phase: - In the first phase, shared parameters are trained to exploration. - In the second phase, the controller's parameters are trained. """ for self.epoch in range(self.start_epoch, self.args.max_epoch): start_epoch_time = time.time() # 1. Training the shared parameters of the child graphnas self.train_shared(max_step=self.args.shared_initial_step) # 2. Training the controller parameters theta self.train_controller() if self.epoch % self.args.save_epoch == 0: self.save_model() end_epoch_time = time.time() print("epoch ", str(self.epoch), " took: ", str(end_epoch_time - start_epoch_time)) self.save_model() def train_shared(self, max_step=50, gnn_list=None): """ Args: max_step: Used to run extra training steps as a warm-up. gnn: If not None, is used instead of calling sample(). """ if max_step == 0: # no train shared return print("*" * 35, "training model", "*" * 35) gnn_list = gnn_list if gnn_list else self.controller.sample(max_step) for gnn in gnn_list: gnn = self.form_gnn_info(gnn) try: _, val_score = \ self.submodel_manager.train(gnn, format=self.args.format) logger.info(str(gnn) + ", val_score:" + str(val_score)) except RuntimeError as e: if 'CUDA' in str(e): # usually CUDA Out of Memory print(e) else: raise e print("*" * 35, "training over", "*" * 35) def get_reward(self, gnn_list, entropies, hidden): """ Computes the reward of a single sampled model on validation data. """ if not isinstance(entropies, np.ndarray): entropies = entropies.data.cpu().numpy() if isinstance(gnn_list, dict): gnn_list = [gnn_list] if isinstance(gnn_list[0], list) or isinstance(gnn_list[0], dict): pass else: gnn_list = [gnn_list] # when structure_list is one structure reward_list = [] for gnn in gnn_list: gnn = self.form_gnn_info(gnn) val_acc, metrics = \ self.submodel_manager.train( gnn, format=self.args.format) # Manage Hall of Fame if self.args.opt_metric not in metrics: print("Could not find optimization metric", self.args.opt_metric, "in metrics dict.") reward = self.reward_manager.get_reward(0) else: self.hof.add(gnn, metrics[self.args.opt_metric]) # Calculate reward in terms of the optimization metric selected reward = self.reward_manager.get_reward( metrics[self.args.opt_metric]) reward_list.append(reward) if self.args.entropy_mode == 'reward': rewards = reward_list + self.args.entropy_coeff * entropies elif self.args.entropy_mode == 'regularizer': rewards = reward_list * np.ones_like(entropies) else: raise NotImplementedError('Unkown entropy mode:' + str(self.args.entropy_mode)) return rewards, hidden def train_controller(self): """ Train controller to find better structure. """ print("*" * 35, "training controller", "*" * 35) model = self.controller model.train() baseline = None adv_history = [] entropy_history = [] reward_history = [] hidden = self.controller.init_hidden(self.args.batch_size) total_loss = 0 for step in range(self.args.controller_max_step): # sample graphnas structure_list, log_probs, entropies = \ self.controller.sample(with_details=True) # calculate reward np_entropies = entropies.data.cpu().numpy() results = self.get_reward(structure_list, np_entropies, hidden) torch.cuda.empty_cache() if results: # has reward rewards, hidden = results else: # CUDA Error happens, drop structure # and step into next iteration continue # discount if 1 > self.args.discount > 0: rewards = discount(rewards, self.args.discount) reward_history.extend(rewards) entropy_history.extend(np_entropies) # moving average baseline if baseline is None: baseline = rewards else: decay = self.args.ema_baseline_decay baseline = decay * baseline + (1 - decay) * rewards adv = rewards - baseline history.append(adv) adv = scale(adv, scale_value=0.5) adv_history.extend(adv) adv = utils.get_variable(adv, self.cuda, requires_grad=False) # policy loss loss = -log_probs * adv if self.args.entropy_mode == 'regularizer': loss -= self.args.entropy_coeff * entropies loss = loss.sum() # or loss.mean() # update self.controller_optim.zero_grad() loss.backward() if self.args.controller_grad_clip > 0: torch.nn.utils.clip_grad_norm(model.parameters(), self.args.controller_grad_clip) self.controller_optim.step() total_loss += utils.to_item(loss.data) self.controller_step += 1 torch.cuda.empty_cache() print("*" * 35, "training controller over", "*" * 35) def evaluate(self, gnn): """ Evaluate a structure on the validation set. """ self.controller.eval() gnn = self.form_gnn_info(gnn) reward, scores, metrics = \ self.submodel_manager.train(gnn, format=self.args.format) logger.info("".join([ 'eval | ', str(gnn), ' | reward: {:8.2f}'.format(reward), ' | scores: {:8.2f}'.format(scores) ])) @property def controller_path(self): return "".join([ str(self.args.dataset), "/controller_epoch", str(self.epoch), "_step", str(self.controller_step), ".pth" ]) @property def controller_optimizer_path(self): return "".join([ str(self.args.dataset), "/controller_epoch", str(self.epoch), "_step", str(self.controller_step), "_optimizer.pth" ]) def get_saved_models_info(self): paths = glob.glob(os.path.join(self.args.dataset, '*.pth')) paths.sort() def get_numbers(items, delimiter, idx, replace_word, must_contain=''): return list( set([ int(name.split(delimiter)[idx].replace(replace_word, '')) for name in items if must_contain in name ])) basenames = [ os.path.basename(path.rsplit('.', 1)[0]) for path in paths ] epochs = get_numbers(basenames, '_', 1, 'epoch') shared_steps = get_numbers(basenames, '_', 2, 'step', 'shared') controller_steps = get_numbers(basenames, '_', 2, 'step', 'controller') epochs.sort() shared_steps.sort() controller_steps.sort() return epochs, shared_steps, controller_steps def save_model(self): torch.save(self.controller.state_dict(), self.controller_path) torch.save(self.controller_optim.state_dict(), self.controller_optimizer_path) logger.info('[*] SAVED: ' + str(self.controller_path)) epochs, shared_steps, controller_steps = self.get_saved_models_info() for epoch in epochs[:-self.args.max_save_num]: paths = glob.glob( os.path.join(self.args.dataset, '*_epoch' + str(epoch) + '_*.pth')) for path in paths: utils.remove_file(path) def load_model(self): epochs, shared_steps, controller_steps = self.get_saved_models_info() if len(epochs) == 0: logger.info('[!] No checkpoint found in ' + str(self.args.dataset) + '...') return self.epoch = self.start_epoch = max(epochs) self.controller_step = max(controller_steps) self.controller.load_state_dict(torch.load(self.controller_path)) self.controller_optim.load_state_dict( torch.load(self.controller_optimizer_path)) logger.info('[*] LOADED: ' + str(self.controller_path))
class CitationGNNManager(object): def __init__(self, args): self.args = args if hasattr(args, 'dataset') and args.dataset in [ "cora", "citeseer", "pubmed" ]: self.data = load(args) self.args.in_feats = self.in_feats = self.data.features.shape[1] self.args.num_class = self.n_classes = self.data.num_labels self.early_stop_manager = EarlyStop(10) self.reward_manager = TopAverage(10) self.args = args self.drop_out = args.in_drop self.multi_label = args.multi_label self.lr = args.lr self.weight_decay = args.weight_decay self.retrain_epochs = args.retrain_epochs self.loss_fn = torch.nn.BCELoss() self.epochs = args.epochs self.train_graph_index = 0 self.train_set_length = 10 self.param_file = args.param_file self.shared_params = None self.loss_fn = torch.nn.functional.nll_loss def load_param(self): # don't share param pass def save_param(self, model, update_all=False): # don't share param pass # train from scratch def evaluate(self, actions=None, format="two"): actions = process_action(actions, format, self.args) print("train action:", actions) # create model model = self.build_gnn(actions) if self.args.cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay) try: model, val_acc, test_acc = self.run_model( model, optimizer, self.loss_fn, self.data, self.epochs, cuda=self.args.cuda, return_best=True, half_stop_score=max( self.reward_manager.get_top_average() * 0.7, 0.4)) except RuntimeError as e: if "cuda" in str(e) or "CUDA" in str(e): print(e) val_acc = 0 test_acc = 0 else: raise e return val_acc, test_acc # train from scratch def train(self, actions=None, format="two"): origin_action = actions # ['gat', 'max', 'tanh', 1, 128, 'cos', 'sum', 'tanh', 4, 16] actions = process_action( actions, format, self.args ) # ['gat', 'max', 'tanh', 1, 128, 'cos', 'sum', 'tanh', 4, 6] print("train action:", actions) # create model model = self.build_gnn( actions ) # -> micro_model_manager.py -> ZengManager(GeoCitationManager) try: if self.args.cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=self.args.lr, weight_decay=self.args.weight_decay ) # lr = 0.2 weight_decay = 0.0005 model, val_acc = self.run_model( model, optimizer, self.loss_fn, self.data, self.epochs, cuda=self.args.cuda, half_stop_score=max( self.reward_manager.get_top_average() * 0.7, 0.4)) except RuntimeError as e: if "cuda" in str(e) or "CUDA" in str(e): print(e) val_acc = 0 else: raise e reward = self.reward_manager.get_reward(val_acc) self.save_param(model, update_all=(reward > 0)) self.record_action_info(origin_action, reward, val_acc) return reward, val_acc def record_action_info(self, origin_action, reward, val_acc): with open( self.args.dataset + "_" + self.args.search_mode + self.args.submanager_log_file, "a") as file: # with open(f'{self.args.dataset}_{self.args.search_mode}_{self.args.format}_manager_result.txt', "a") as file: file.write(str(origin_action)) file.write(";") file.write(str(reward)) file.write(";") file.write(str(val_acc)) file.write("\n") def build_gnn(self, actions): model = GraphNet(actions, self.in_feats, self.n_classes, drop_out=self.args.in_drop, multi_label=False, batch_normal=False) return model def retrain(self, actions, format="two"): return self.train(actions, format) def test_with_param(self, actions=None, format="two", with_retrain=False): return self.train(actions, format) @staticmethod def run_model(model, optimizer, loss_fn, data, epochs, early_stop=5, tmp_model_file="geo_citation.pkl", half_stop_score=0, return_best=False, cuda=True, need_early_stop=False, show_info=False): dur = [] begin_time = time.time() best_performance = 0 min_val_loss = float("inf") min_train_loss = float("inf") model_val_acc = 0 features, g, labels, mask, val_mask, test_mask, n_edges = CitationGNNManager.prepare_data( data, cuda) for epoch in range(1, epochs + 1): model.train() t0 = time.time() # forward logits = model(features, g) logits = F.log_softmax(logits, 1) loss = loss_fn(logits[mask], labels[mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_loss = loss.item() # evaluate model.eval() logits = model(features, g) logits = F.log_softmax(logits, 1) train_acc = evaluate(logits, labels, mask) dur.append(time.time() - t0) val_loss = float(loss_fn(logits[val_mask], labels[val_mask])) val_acc = evaluate(logits, labels, val_mask) test_acc = evaluate(logits, labels, test_mask) if val_loss < min_val_loss: # and train_loss < min_train_loss min_val_loss = val_loss min_train_loss = train_loss model_val_acc = val_acc if test_acc > best_performance: best_performance = test_acc if show_info: print( "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | acc {:.4f} | val_acc {:.4f} | test_acc {:.4f}" .format(epoch, loss.item(), np.mean(dur), train_acc, val_acc, test_acc)) end_time = time.time() print("Each Epoch Cost Time: %f " % ((end_time - begin_time) / epoch)) print(f"val_score:{model_val_acc},test_score:{best_performance}") if return_best: return model, model_val_acc, best_performance else: return model, model_val_acc # @staticmethod # def run_model(model, optimizer, loss_fn, data, epochs, early_stop=5, tmp_model_file="citation_testing_2.pkl", # half_stop_score=0, return_best=False, cuda=True, need_early_stop=False): # # early_stop_manager = EarlyStop(early_stop) # # initialize graph # dur = [] # begin_time = time.time() # features, g, labels, mask, val_mask, test_mask, n_edges = CitationGNNManager.prepare_data(data, cuda) # saved = False # best_performance = 0 # for epoch in range(1, epochs + 1): # should_break = False # t0 = time.time() # # model.train() # logits = model(features, g) # logits = F.log_softmax(logits, 1) # loss = loss_fn(logits[mask], labels[mask]) # optimizer.zero_grad() # loss.backward() # optimizer.step() # # model.eval() # logits = model(features, g) # logits = F.log_softmax(logits, 1) # train_acc = evaluate(logits, labels, mask) # train_loss = float(loss) # dur.append(time.time() - t0) # # val_loss = float(loss_fn(logits[val_mask], labels[val_mask])) # val_acc = evaluate(logits, labels, val_mask) # test_acc = evaluate(logits, labels, test_mask) # # print( # "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | acc {:.4f} | val_acc {:.4f} | test_acc {:.4f}".format( # epoch, loss.item(), np.mean(dur), train_acc, val_acc, test_acc)) # # end_time = time.time() # print("Each Epoch Cost Time: %f " % ((end_time - begin_time) / epoch)) # # print("Test Accuracy {:.4f}".format(acc)) # if early_stop_manager.should_save(train_loss, train_acc, val_loss, val_acc): # saved = True # torch.save(model.state_dict(), tmp_model_file) # if test_acc > best_performance: # best_performance = test_acc # if need_early_stop and early_stop_manager.should_stop(train_loss, train_acc, val_loss, val_acc): # should_break = True # if should_break and epoch > 50: # print("early stop") # break # if half_stop_score > 0 and epoch > (epochs / 2) and val_acc < half_stop_score: # print("half_stop") # break # if saved: # model.load_state_dict(torch.load(tmp_model_file)) # model.eval() # val_acc = evaluate(model(features, g), labels, val_mask) # print(evaluate(model(features, g), labels, test_mask)) # if return_best: # return model, val_acc, best_performance # else: # return model, val_acc @staticmethod def prepare_data(data, cuda=True): features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) mask = torch.ByteTensor(data.train_mask) test_mask = torch.ByteTensor(data.test_mask) val_mask = torch.ByteTensor(data.val_mask) n_edges = data.graph.number_of_edges() # create DGL graph g = DGLGraph(data.graph) # add self loop g.add_edges(g.nodes(), g.nodes()) degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: features = features.cuda() labels = labels.cuda() norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) return features, g, labels, mask, val_mask, test_mask, n_edges