Example #1
0
    def __init__(self, cfg, args, global_count, global_writer_loss_count,
                 global_writer_quality_count, global_win_event_count,
                 action_stats_count, save_dir):
        super(AgentSacTrainer_sg_lg, self).__init__()

        self.cfg = cfg
        self.args = args
        self.global_count = global_count
        self.global_writer_loss_count = global_writer_loss_count
        self.global_writer_quality_count = global_writer_quality_count
        self.global_win_event_count = global_win_event_count
        self.action_stats_count = action_stats_count
        # self.eps = self.args.init_epsilon
        self.save_dir = save_dir
        if args.stop_qual_rule == 'naive':
            self.stop_qual_rule = NaiveDecay(initial_eps=args.init_stop_qual,
                                             episode_shrinkage=1,
                                             change_after_n_episodes=5)
        elif args.stop_qual_rule == 'gaussian':
            self.stop_qual_rule = GaussianDecay(args.stop_qual_final,
                                                args.stop_qual_scaling,
                                                args.stop_qual_offset,
                                                args.T_max)
        elif args.stop_qual_rule == 'running_average':
            self.stop_qual_rule = RunningAverage(
                args.stop_qual_ra_bw,
                args.stop_qual_scaling + args.stop_qual_offset,
                args.stop_qual_ra_off)
        else:
            self.stop_qual_rule = Constant(args.stop_qual_final)

        if self.cfg.temperature_regulation == 'follow_quality':
            self.beta_rule = FollowLeadAvg(1, 80, 1)
        elif self.cfg.temperature_regulation == 'constant':
            self.eps_rule = Constant(cfg.init_temperature)
Example #2
0
    def __init__(self, args, shared_average_model, global_count,
                 global_writer_loss_count, global_writer_quality_count,
                 global_win_event_count, save_dir):
        super(AgentAcerContinuousTrainer, self).__init__()

        self.args = args
        self.shared_average_model = shared_average_model
        self.global_count = global_count
        self.global_writer_loss_count = global_writer_loss_count
        self.global_writer_quality_count = global_writer_quality_count
        self.global_win_event_count = global_win_event_count
        self.writer_idx_warmup_loss = 0
        # self.eps = self.args.init_epsilon
        self.save_dir = save_dir
        if args.stop_qual_rule == 'naive':
            self.stop_qual_rule = NaiveDecay(initial_eps=args.init_stop_qual,
                                             episode_shrinkage=1,
                                             change_after_n_episodes=5)
        elif args.stop_qual_rule == 'gaussian':
            self.stop_qual_rule = GaussianDecay(args.stop_qual_final,
                                                args.stop_qual_scaling,
                                                args.stop_qual_offset,
                                                args.T_max)
        elif args.stop_qual_rule == 'running_average':
            self.stop_qual_rule = RunningAverage(
                args.stop_qual_ra_bw,
                args.stop_qual_scaling + args.stop_qual_offset,
                args.stop_qual_ra_off)
        else:
            self.stop_qual_rule = NaiveDecay(args.init_stop_qual)

        if self.args.eps_rule == "treesearch":
            self.b_sigma_rule = ActionPathTreeNodes()
        elif self.args.eps_rule == "sawtooth":
            self.b_sigma_rule = ExpSawtoothEpsDecay()
        elif self.args.eps_rule == 'gaussian':
            self.b_sigma_rule = GaussianDecay(args.b_sigma_final,
                                              args.b_sigma_scaling,
                                              args.p_sigma, args.T_max)
        elif self.args.eps_rule == "self_reg_min":
            self.args.T_max = np.inf
            self.b_sigma_rule = FollowLeadMin(
                (args.stop_qual_scaling + args.stop_qual_offset), 1)
        elif self.args.eps_rule == "self_reg_avg":
            self.args.T_max = np.inf
            self.b_sigma_rule = FollowLeadAvg(
                (args.stop_qual_scaling + args.stop_qual_offset) / 4, 2, 1)
        elif self.args.eps_rule == "self_reg_exp_avg":
            self.args.T_max = np.inf
            self.b_sigma_rule = ExponentialAverage(
                (args.stop_qual_scaling + args.stop_qual_offset) / 4, 0.9, 1)
        else:
            self.b_sigma_rule = NaiveDecay(self.eps, 0.00005, 1000, 1)
Example #3
0
    def train_step(self, rank, writer):
        device = torch.device("cuda:" +
                              str(rank // self.cfg.gen.n_processes_per_gpu))
        print('Running on device: ', device)
        torch.cuda.set_device(device)
        torch.set_default_tensor_type(torch.FloatTensor)
        self.setup(rank, self.cfg.gen.n_processes_per_gpu * self.cfg.gen.n_gpu)

        env = SpGcnEnv(self.cfg,
                       device,
                       writer=writer,
                       writer_counter=self.global_writer_quality_count)
        # Create shared network

        model = GcnEdgeAC(self.cfg, device, writer=writer)
        model.cuda(device)
        shared_model = DDP(model,
                           device_ids=[device],
                           find_unused_parameters=True)
        if 'extra' in self.cfg.fe.optim:
            # optimizers
            MovSumLosses = namedtuple(
                'mov_avg_losses',
                ('actor', 'embeddings', 'critic', 'temperature'))
            OptimizerContainer = namedtuple(
                'OptimizerContainer',
                ('actor', 'embeddings', 'critic', 'temperature', 'actor_shed',
                 'embed_shed', 'critic_shed', 'temp_shed'))
        else:
            MovSumLosses = namedtuple('mov_avg_losses',
                                      ('actor', 'critic', 'temperature'))
            OptimizerContainer = namedtuple(
                'OptimizerContainer',
                ('actor', 'critic', 'temperature', 'actor_shed', 'critic_shed',
                 'temp_shed'))
        if "rl_loss" == self.cfg.fe.optim:
            actor_optimizer = torch.optim.Adam(
                list(shared_model.module.actor.parameters()) +
                list(shared_model.module.fe_ext.parameters()),
                lr=self.cfg.sac.actor_lr,
                betas=self.cfg.sac.actor_betas)
        else:
            actor_optimizer = torch.optim.Adam(
                shared_model.module.actor.parameters(),
                lr=self.cfg.sac.actor_lr,
                betas=self.cfg.sac.actor_betas)
        if "extra" in self.cfg.fe.optim:
            embeddings_optimizer = torch.optim.Adam(
                shared_model.module.fe_ext.parameters(),
                lr=self.cfg.fe.lr,
                betas=self.cfg.fe.betas)
        critic_optimizer = torch.optim.Adam(
            shared_model.module.critic.parameters(),
            lr=self.cfg.sac.critic_lr,
            betas=self.cfg.sac.critic_betas)
        temp_optimizer = torch.optim.Adam([shared_model.module.log_alpha],
                                          lr=self.cfg.sac.alpha_lr,
                                          betas=self.cfg.sac.alpha_betas)

        if "extra" in self.cfg.fe.optim:
            mov_sum_losses = MovSumLosses(RunningAverage(), RunningAverage(),
                                          RunningAverage(), RunningAverage())
            optimizers = OptimizerContainer(
                actor_optimizer, embeddings_optimizer, critic_optimizer,
                temp_optimizer, ReduceLROnPlateau(actor_optimizer),
                ReduceLROnPlateau(embeddings_optimizer),
                ReduceLROnPlateau(critic_optimizer),
                ReduceLROnPlateau(temp_optimizer))
        else:
            mov_sum_losses = MovSumLosses(RunningAverage(), RunningAverage(),
                                          RunningAverage())
            optimizers = OptimizerContainer(
                actor_optimizer, critic_optimizer, temp_optimizer,
                ReduceLROnPlateau(actor_optimizer),
                ReduceLROnPlateau(critic_optimizer),
                ReduceLROnPlateau(temp_optimizer))

        dist.barrier()

        if self.cfg.gen.resume:
            shared_model.module.load_state_dict(
                torch.load(os.path.join(self.log_dir,
                                        self.cfg.gen.model_name)))
        elif self.cfg.fe.load_pretrained:
            shared_model.module.fe_ext.load_state_dict(
                torch.load(os.path.join(self.save_dir,
                                        self.cfg.fe.model_name)))
        elif 'warmup' in self.cfg.fe and rank == 0:
            print('pretrain fe extractor')
            self.pretrain_embeddings_gt(shared_model.module.fe_ext, device,
                                        writer)
            torch.save(shared_model.module.fe_ext.state_dict(),
                       os.path.join(self.save_dir, self.cfg.fe.model_name))
        dist.barrier()

        if "none" == self.cfg.fe.optim:
            for param in shared_model.module.fe_ext.parameters():
                param.requires_grad = False

        dset = SpgDset(self.cfg.gen.data_dir)
        step = 0
        while self.global_count.value() <= self.cfg.trainer.T_max:
            dloader = DataLoader(dset,
                                 batch_size=self.cfg.trainer.batch_size,
                                 shuffle=True,
                                 pin_memory=True,
                                 num_workers=0)
            for iteration in range(
                    len(dset) * self.cfg.trainer.data_update_frequency):
                # if self.global_count.value() > self.args.T_max:
                #     a=1
                if iteration % self.cfg.trainer.data_update_frequency == 0:
                    self.update_env_data(env, dloader, device)
                # waff_dis = torch.softmax(env.edge_features[:, 0].squeeze() + 1e-30, dim=0)
                # waff_dis = torch.softmax(env.gt_edge_weights + 0.5, dim=0)
                # waff_dis = torch.softmax(torch.ones_like(env.b_gt_edge_weights), dim=0)
                # loss_weight = torch.softmax(env.b_gt_edge_weights + 1, dim=0)
                env.reset()
                self.update_rt_vars(critic_optimizer, actor_optimizer)
                if rank == 0 and self.cfg.rt_vars.safe_model:
                    if self.cfg.gen.model_name != "":
                        torch.save(
                            shared_model.module.state_dict(),
                            os.path.join(self.log_dir,
                                         self.cfg.gen.model_name))
                    else:
                        torch.save(shared_model.module.state_dict(),
                                   os.path.join(self.log_dir, 'agent_model'))

                state = env.get_state()
                while not env.done:
                    # Calculate policy and values
                    post_stats = True if (self.global_writer_count.value() + 1) % self.cfg.trainer.post_stats_frequency == 0 \
                        else False
                    post_model = True if (self.global_writer_count.value() + 1) % self.cfg.trainer.post_model_frequency == 0 \
                        else False
                    post_stats &= self.memory.is_full()
                    post_model &= self.memory.is_full()
                    distr = None
                    if not self.memory.is_full():
                        action = torch.rand_like(env.current_edge_weights)
                    else:
                        distr, _, _, action, _, _ = self.agent_forward(
                            env,
                            shared_model,
                            state=state,
                            grad=False,
                            post_input=post_stats,
                            post_model=post_model)

                    logg_dict = {}
                    if post_stats:
                        for i in range(len(self.cfg.sac.s_subgraph)):
                            logg_dict[
                                'alpha_' +
                                str(i)] = shared_model.module.alpha[i].item()
                        if distr is not None:
                            logg_dict['mean_loc'] = distr.loc.mean().item()
                            logg_dict['mean_scale'] = distr.scale.mean().item()

                    if self.memory.is_full():
                        for i in range(self.cfg.trainer.n_updates_per_step):
                            self._step(self.memory,
                                       optimizers,
                                       mov_sum_losses,
                                       env,
                                       shared_model,
                                       step,
                                       writer=writer)
                            self.global_writer_loss_count.increment()

                    next_state, reward = env.execute_action(
                        action, logg_dict, post_stats=post_stats)
                    # next_state, reward, quality = env.execute_action(torch.sigmoid(distr.loc), logg_dict, post_stats=post_stats)

                    if self.cfg.rt_vars.add_noise:
                        noise = torch.randn_like(reward) * 0.2
                        reward = reward + noise

                    self.memory.push(self.state_to_cpu(state), action, reward,
                                     self.state_to_cpu(next_state), env.done)
                    state = next_state

                self.global_count.increment()
                step += 1
                if rank == 0:
                    self.global_writer_count.increment()
                if step > self.cfg.trainer.T_max:
                    break

        dist.barrier()
        if rank == 0:
            self.memory.clear()
            if not self.cfg.gen.cross_validate_hp and not self.cfg.gen.test_score_only and not self.cfg.gen.no_save:
                # pass
                if self.cfg.gen.model_name != "":
                    torch.save(
                        shared_model.state_dict(),
                        os.path.join(self.log_dir, self.cfg.gen.model_name))
                    print('saved')
                else:
                    torch.save(shared_model.state_dict(),
                               os.path.join(self.log_dir, 'agent_model'))

        self.cleanup()
        return sum(env.acc_reward) / len(env.acc_reward)