def __init__(self, cfg, args, global_count, global_writer_loss_count, global_writer_quality_count, global_win_event_count, action_stats_count, save_dir): super(AgentSacTrainer_sg_lg, self).__init__() self.cfg = cfg self.args = args self.global_count = global_count self.global_writer_loss_count = global_writer_loss_count self.global_writer_quality_count = global_writer_quality_count self.global_win_event_count = global_win_event_count self.action_stats_count = action_stats_count # self.eps = self.args.init_epsilon self.save_dir = save_dir if args.stop_qual_rule == 'naive': self.stop_qual_rule = NaiveDecay(initial_eps=args.init_stop_qual, episode_shrinkage=1, change_after_n_episodes=5) elif args.stop_qual_rule == 'gaussian': self.stop_qual_rule = GaussianDecay(args.stop_qual_final, args.stop_qual_scaling, args.stop_qual_offset, args.T_max) elif args.stop_qual_rule == 'running_average': self.stop_qual_rule = RunningAverage( args.stop_qual_ra_bw, args.stop_qual_scaling + args.stop_qual_offset, args.stop_qual_ra_off) else: self.stop_qual_rule = Constant(args.stop_qual_final) if self.cfg.temperature_regulation == 'follow_quality': self.beta_rule = FollowLeadAvg(1, 80, 1) elif self.cfg.temperature_regulation == 'constant': self.eps_rule = Constant(cfg.init_temperature)
def __init__(self, args, shared_average_model, global_count, global_writer_loss_count, global_writer_quality_count, global_win_event_count, save_dir): super(AgentAcerContinuousTrainer, self).__init__() self.args = args self.shared_average_model = shared_average_model self.global_count = global_count self.global_writer_loss_count = global_writer_loss_count self.global_writer_quality_count = global_writer_quality_count self.global_win_event_count = global_win_event_count self.writer_idx_warmup_loss = 0 # self.eps = self.args.init_epsilon self.save_dir = save_dir if args.stop_qual_rule == 'naive': self.stop_qual_rule = NaiveDecay(initial_eps=args.init_stop_qual, episode_shrinkage=1, change_after_n_episodes=5) elif args.stop_qual_rule == 'gaussian': self.stop_qual_rule = GaussianDecay(args.stop_qual_final, args.stop_qual_scaling, args.stop_qual_offset, args.T_max) elif args.stop_qual_rule == 'running_average': self.stop_qual_rule = RunningAverage( args.stop_qual_ra_bw, args.stop_qual_scaling + args.stop_qual_offset, args.stop_qual_ra_off) else: self.stop_qual_rule = NaiveDecay(args.init_stop_qual) if self.args.eps_rule == "treesearch": self.b_sigma_rule = ActionPathTreeNodes() elif self.args.eps_rule == "sawtooth": self.b_sigma_rule = ExpSawtoothEpsDecay() elif self.args.eps_rule == 'gaussian': self.b_sigma_rule = GaussianDecay(args.b_sigma_final, args.b_sigma_scaling, args.p_sigma, args.T_max) elif self.args.eps_rule == "self_reg_min": self.args.T_max = np.inf self.b_sigma_rule = FollowLeadMin( (args.stop_qual_scaling + args.stop_qual_offset), 1) elif self.args.eps_rule == "self_reg_avg": self.args.T_max = np.inf self.b_sigma_rule = FollowLeadAvg( (args.stop_qual_scaling + args.stop_qual_offset) / 4, 2, 1) elif self.args.eps_rule == "self_reg_exp_avg": self.args.T_max = np.inf self.b_sigma_rule = ExponentialAverage( (args.stop_qual_scaling + args.stop_qual_offset) / 4, 0.9, 1) else: self.b_sigma_rule = NaiveDecay(self.eps, 0.00005, 1000, 1)
def train_step(self, rank, writer): device = torch.device("cuda:" + str(rank // self.cfg.gen.n_processes_per_gpu)) print('Running on device: ', device) torch.cuda.set_device(device) torch.set_default_tensor_type(torch.FloatTensor) self.setup(rank, self.cfg.gen.n_processes_per_gpu * self.cfg.gen.n_gpu) env = SpGcnEnv(self.cfg, device, writer=writer, writer_counter=self.global_writer_quality_count) # Create shared network model = GcnEdgeAC(self.cfg, device, writer=writer) model.cuda(device) shared_model = DDP(model, device_ids=[device], find_unused_parameters=True) if 'extra' in self.cfg.fe.optim: # optimizers MovSumLosses = namedtuple( 'mov_avg_losses', ('actor', 'embeddings', 'critic', 'temperature')) OptimizerContainer = namedtuple( 'OptimizerContainer', ('actor', 'embeddings', 'critic', 'temperature', 'actor_shed', 'embed_shed', 'critic_shed', 'temp_shed')) else: MovSumLosses = namedtuple('mov_avg_losses', ('actor', 'critic', 'temperature')) OptimizerContainer = namedtuple( 'OptimizerContainer', ('actor', 'critic', 'temperature', 'actor_shed', 'critic_shed', 'temp_shed')) if "rl_loss" == self.cfg.fe.optim: actor_optimizer = torch.optim.Adam( list(shared_model.module.actor.parameters()) + list(shared_model.module.fe_ext.parameters()), lr=self.cfg.sac.actor_lr, betas=self.cfg.sac.actor_betas) else: actor_optimizer = torch.optim.Adam( shared_model.module.actor.parameters(), lr=self.cfg.sac.actor_lr, betas=self.cfg.sac.actor_betas) if "extra" in self.cfg.fe.optim: embeddings_optimizer = torch.optim.Adam( shared_model.module.fe_ext.parameters(), lr=self.cfg.fe.lr, betas=self.cfg.fe.betas) critic_optimizer = torch.optim.Adam( shared_model.module.critic.parameters(), lr=self.cfg.sac.critic_lr, betas=self.cfg.sac.critic_betas) temp_optimizer = torch.optim.Adam([shared_model.module.log_alpha], lr=self.cfg.sac.alpha_lr, betas=self.cfg.sac.alpha_betas) if "extra" in self.cfg.fe.optim: mov_sum_losses = MovSumLosses(RunningAverage(), RunningAverage(), RunningAverage(), RunningAverage()) optimizers = OptimizerContainer( actor_optimizer, embeddings_optimizer, critic_optimizer, temp_optimizer, ReduceLROnPlateau(actor_optimizer), ReduceLROnPlateau(embeddings_optimizer), ReduceLROnPlateau(critic_optimizer), ReduceLROnPlateau(temp_optimizer)) else: mov_sum_losses = MovSumLosses(RunningAverage(), RunningAverage(), RunningAverage()) optimizers = OptimizerContainer( actor_optimizer, critic_optimizer, temp_optimizer, ReduceLROnPlateau(actor_optimizer), ReduceLROnPlateau(critic_optimizer), ReduceLROnPlateau(temp_optimizer)) dist.barrier() if self.cfg.gen.resume: shared_model.module.load_state_dict( torch.load(os.path.join(self.log_dir, self.cfg.gen.model_name))) elif self.cfg.fe.load_pretrained: shared_model.module.fe_ext.load_state_dict( torch.load(os.path.join(self.save_dir, self.cfg.fe.model_name))) elif 'warmup' in self.cfg.fe and rank == 0: print('pretrain fe extractor') self.pretrain_embeddings_gt(shared_model.module.fe_ext, device, writer) torch.save(shared_model.module.fe_ext.state_dict(), os.path.join(self.save_dir, self.cfg.fe.model_name)) dist.barrier() if "none" == self.cfg.fe.optim: for param in shared_model.module.fe_ext.parameters(): param.requires_grad = False dset = SpgDset(self.cfg.gen.data_dir) step = 0 while self.global_count.value() <= self.cfg.trainer.T_max: dloader = DataLoader(dset, batch_size=self.cfg.trainer.batch_size, shuffle=True, pin_memory=True, num_workers=0) for iteration in range( len(dset) * self.cfg.trainer.data_update_frequency): # if self.global_count.value() > self.args.T_max: # a=1 if iteration % self.cfg.trainer.data_update_frequency == 0: self.update_env_data(env, dloader, device) # waff_dis = torch.softmax(env.edge_features[:, 0].squeeze() + 1e-30, dim=0) # waff_dis = torch.softmax(env.gt_edge_weights + 0.5, dim=0) # waff_dis = torch.softmax(torch.ones_like(env.b_gt_edge_weights), dim=0) # loss_weight = torch.softmax(env.b_gt_edge_weights + 1, dim=0) env.reset() self.update_rt_vars(critic_optimizer, actor_optimizer) if rank == 0 and self.cfg.rt_vars.safe_model: if self.cfg.gen.model_name != "": torch.save( shared_model.module.state_dict(), os.path.join(self.log_dir, self.cfg.gen.model_name)) else: torch.save(shared_model.module.state_dict(), os.path.join(self.log_dir, 'agent_model')) state = env.get_state() while not env.done: # Calculate policy and values post_stats = True if (self.global_writer_count.value() + 1) % self.cfg.trainer.post_stats_frequency == 0 \ else False post_model = True if (self.global_writer_count.value() + 1) % self.cfg.trainer.post_model_frequency == 0 \ else False post_stats &= self.memory.is_full() post_model &= self.memory.is_full() distr = None if not self.memory.is_full(): action = torch.rand_like(env.current_edge_weights) else: distr, _, _, action, _, _ = self.agent_forward( env, shared_model, state=state, grad=False, post_input=post_stats, post_model=post_model) logg_dict = {} if post_stats: for i in range(len(self.cfg.sac.s_subgraph)): logg_dict[ 'alpha_' + str(i)] = shared_model.module.alpha[i].item() if distr is not None: logg_dict['mean_loc'] = distr.loc.mean().item() logg_dict['mean_scale'] = distr.scale.mean().item() if self.memory.is_full(): for i in range(self.cfg.trainer.n_updates_per_step): self._step(self.memory, optimizers, mov_sum_losses, env, shared_model, step, writer=writer) self.global_writer_loss_count.increment() next_state, reward = env.execute_action( action, logg_dict, post_stats=post_stats) # next_state, reward, quality = env.execute_action(torch.sigmoid(distr.loc), logg_dict, post_stats=post_stats) if self.cfg.rt_vars.add_noise: noise = torch.randn_like(reward) * 0.2 reward = reward + noise self.memory.push(self.state_to_cpu(state), action, reward, self.state_to_cpu(next_state), env.done) state = next_state self.global_count.increment() step += 1 if rank == 0: self.global_writer_count.increment() if step > self.cfg.trainer.T_max: break dist.barrier() if rank == 0: self.memory.clear() if not self.cfg.gen.cross_validate_hp and not self.cfg.gen.test_score_only and not self.cfg.gen.no_save: # pass if self.cfg.gen.model_name != "": torch.save( shared_model.state_dict(), os.path.join(self.log_dir, self.cfg.gen.model_name)) print('saved') else: torch.save(shared_model.state_dict(), os.path.join(self.log_dir, 'agent_model')) self.cleanup() return sum(env.acc_reward) / len(env.acc_reward)