コード例 #1
0
 def __init__(self,
              policy,
              env,
              writer,
              gamma=1.,
              lr=1e-2,
              pretrained_lm=None,
              word_emb_size=8,
              hidden_size=24,
              pretrain=False,
              kernel_size=1,
              stride=2,
              num_filters=3,
              num_truncated=10,
              update_every=30):
     Agent.__init__(self,
                    policy,
                    env,
                    gamma=gamma,
                    lr=lr,
                    pretrained_lm=pretrained_lm,
                    word_emb_size=word_emb_size,
                    hidden_size=hidden_size,
                    pretrain=pretrain,
                    update_every=update_every,
                    kernel_size=kernel_size,
                    stride=stride,
                    num_filters=num_filters,
                    num_truncated=num_truncated,
                    writer=writer)
     self.update_every = 1
     self.MSE_loss = nn.MSELoss(reduction="none")
     self.update_mode = "episode"
     self.writer_iteration = 0
コード例 #2
0
ファイル: reinforce.py プロジェクト: AMDonati/RL-NLP
    def __init__(self, policy, optimizer, env, test_envs, pretrained_lm, writer, out_path, gamma=1., lr=1e-2,
                 grad_clip=None, scheduler=None,
                 pretrain=False, update_every=50, num_truncated=10, p_th=None, truncate_mode="top_k", log_interval=10,
                 eval_no_trunc=0, alpha_logits=0., alpha_decay_rate=0., epsilon_truncated=0., train_seed=0,
                 epsilon_truncated_rate=1.,
                 is_loss_correction=1, train_metrics=[], test_metrics=[], top_p=1., temperature=1., temperature_step=1,
                 temp_factor=1., temperature_min=1., temperature_max=10, s_min=10, s_max=200, inv_schedule_step=0,
                 schedule_start=1, curriculum=0, KL_coeff=0., truncation_optim=0):
        Agent.__init__(self, policy=policy, optimizer=optimizer, env=env, writer=writer, out_path=out_path, gamma=gamma,
                       lr=lr,
                       grad_clip=grad_clip,
                       scheduler=scheduler,
                       pretrained_lm=pretrained_lm,
                       pretrain=pretrain, update_every=update_every,
                       num_truncated=num_truncated,
                       p_th=p_th,
                       truncate_mode=truncate_mode,
                       log_interval=log_interval, test_envs=test_envs, eval_no_trunc=eval_no_trunc,
                       alpha_logits=alpha_logits, alpha_decay_rate=alpha_decay_rate,
                       epsilon_truncated=epsilon_truncated,
                       train_seed=train_seed, epsilon_truncated_rate=epsilon_truncated_rate,
                       is_loss_correction=is_loss_correction, train_metrics=train_metrics, test_metrics=test_metrics,
                       top_p=top_p, temperature=temperature, temperature_step=temperature_step, temp_factor=temp_factor,
                       temperature_min=temperature_min, temperature_max=temperature_max, s_min=s_min, s_max=s_max,
                       inv_schedule_step=inv_schedule_step, schedule_start=schedule_start, curriculum=curriculum,
                       KL_coeff=KL_coeff, truncation_optim=truncation_optim)

        self.MSE_loss = nn.MSELoss(reduction="none")
        self.grad_clip = grad_clip
        self.update_mode = "episode"
        self.writer_iteration = 0
コード例 #3
0
 def __init__(self, policy, env, writer, gamma=1., eps_clip=0.2, pretrained_lm=None, update_every=100,
              K_epochs=10, entropy_coeff=0.01, pretrain=False, word_emb_size=8, hidden_size=24, kernel_size=1,
              stride=2, num_filters=3, num_truncated=10):
     Agent.__init__(self, policy, env, writer, gamma=gamma, pretrained_lm=pretrained_lm, pretrain=pretrain,
                    update_every=update_every, word_emb_size=word_emb_size, hidden_size=hidden_size,
                    kernel_size=kernel_size, stride=stride, num_filters=num_filters, num_truncated=num_truncated)
     self.policy_old = policy(env.clevr_dataset.len_vocab, word_emb_size, hidden_size, kernel_size=kernel_size,
                              stride=stride, num_filters=num_filters)
     self.policy_old.load_state_dict(self.policy.state_dict())
     self.policy_old.to(self.device)
     self.K_epochs = K_epochs
     self.MSE_loss = nn.MSELoss(reduction="none")
     self.eps_clip = eps_clip
     self.entropy_coeff = entropy_coeff
     self.update_mode = "episode"
     self.writer_iteration = 0
コード例 #4
0
 def __init__(self, to_me_queue, from_me_queue, initial_location,
              initial_visible_map, name):
     Agent.__init__(self, to_me_queue, from_me_queue, initial_location,
                    initial_visible_map, OPFOR_VISION_DISTANCE, name)
     self.action_queue = []
     self.path = []