def __init__(self, policy, env, writer, gamma=1., lr=1e-2, pretrained_lm=None, word_emb_size=8, hidden_size=24, pretrain=False, kernel_size=1, stride=2, num_filters=3, num_truncated=10, update_every=30): Agent.__init__(self, policy, env, gamma=gamma, lr=lr, pretrained_lm=pretrained_lm, word_emb_size=word_emb_size, hidden_size=hidden_size, pretrain=pretrain, update_every=update_every, kernel_size=kernel_size, stride=stride, num_filters=num_filters, num_truncated=num_truncated, writer=writer) self.update_every = 1 self.MSE_loss = nn.MSELoss(reduction="none") self.update_mode = "episode" self.writer_iteration = 0
def __init__(self, policy, optimizer, env, test_envs, pretrained_lm, writer, out_path, gamma=1., lr=1e-2, grad_clip=None, scheduler=None, pretrain=False, update_every=50, num_truncated=10, p_th=None, truncate_mode="top_k", log_interval=10, eval_no_trunc=0, alpha_logits=0., alpha_decay_rate=0., epsilon_truncated=0., train_seed=0, epsilon_truncated_rate=1., is_loss_correction=1, train_metrics=[], test_metrics=[], top_p=1., temperature=1., temperature_step=1, temp_factor=1., temperature_min=1., temperature_max=10, s_min=10, s_max=200, inv_schedule_step=0, schedule_start=1, curriculum=0, KL_coeff=0., truncation_optim=0): Agent.__init__(self, policy=policy, optimizer=optimizer, env=env, writer=writer, out_path=out_path, gamma=gamma, lr=lr, grad_clip=grad_clip, scheduler=scheduler, pretrained_lm=pretrained_lm, pretrain=pretrain, update_every=update_every, num_truncated=num_truncated, p_th=p_th, truncate_mode=truncate_mode, log_interval=log_interval, test_envs=test_envs, eval_no_trunc=eval_no_trunc, alpha_logits=alpha_logits, alpha_decay_rate=alpha_decay_rate, epsilon_truncated=epsilon_truncated, train_seed=train_seed, epsilon_truncated_rate=epsilon_truncated_rate, is_loss_correction=is_loss_correction, train_metrics=train_metrics, test_metrics=test_metrics, top_p=top_p, temperature=temperature, temperature_step=temperature_step, temp_factor=temp_factor, temperature_min=temperature_min, temperature_max=temperature_max, s_min=s_min, s_max=s_max, inv_schedule_step=inv_schedule_step, schedule_start=schedule_start, curriculum=curriculum, KL_coeff=KL_coeff, truncation_optim=truncation_optim) self.MSE_loss = nn.MSELoss(reduction="none") self.grad_clip = grad_clip self.update_mode = "episode" self.writer_iteration = 0
def __init__(self, policy, env, writer, gamma=1., eps_clip=0.2, pretrained_lm=None, update_every=100, K_epochs=10, entropy_coeff=0.01, pretrain=False, word_emb_size=8, hidden_size=24, kernel_size=1, stride=2, num_filters=3, num_truncated=10): Agent.__init__(self, policy, env, writer, gamma=gamma, pretrained_lm=pretrained_lm, pretrain=pretrain, update_every=update_every, word_emb_size=word_emb_size, hidden_size=hidden_size, kernel_size=kernel_size, stride=stride, num_filters=num_filters, num_truncated=num_truncated) self.policy_old = policy(env.clevr_dataset.len_vocab, word_emb_size, hidden_size, kernel_size=kernel_size, stride=stride, num_filters=num_filters) self.policy_old.load_state_dict(self.policy.state_dict()) self.policy_old.to(self.device) self.K_epochs = K_epochs self.MSE_loss = nn.MSELoss(reduction="none") self.eps_clip = eps_clip self.entropy_coeff = entropy_coeff self.update_mode = "episode" self.writer_iteration = 0
def __init__(self, to_me_queue, from_me_queue, initial_location, initial_visible_map, name): Agent.__init__(self, to_me_queue, from_me_queue, initial_location, initial_visible_map, OPFOR_VISION_DISTANCE, name) self.action_queue = [] self.path = []