env_params.register(params) model = Net(params.n_actions, input_shape=(1 if grayscale else 3, im_height, im_width)) if params.cuda_enabled: model.cuda() loss_fn = nn.MSELoss(size_average=False) optimizer = optim.Adam(model.parameters(), lr=run.getfloat("learning", "lr")) action_selector = ActionSelectorEpsilonGreedy(epsilon=run.getfloat( "defaults", "epsilon"), params=params) target_net = agent.TargetNet(model) dqn_agent = agent.DQNAgent(dqn_model=model, action_selector=action_selector) exp_source = experience.ExperienceSource(env=env_pool, agent=dqn_agent, steps_count=run.getint( "defaults", "n_steps")) exp_replay = experience.ExperienceReplayBuffer(exp_source, buffer_size=run.getint( "exp_buffer", "size")) use_target_dqn = run.getboolean("dqn", "target_dqn", fallback=False) use_double_dqn = run.getboolean("dqn", "double_dqn", fallback=False) if use_target_dqn: target_model = target_net.target_model else: target_model = model
model = Net(env_pool[0].action_space.n, input_shape=(frames_count if grayscale else 3 * frames_count, im_height, im_width), dueling=run.getboolean("dqn", "dueling")) if cuda_enabled: model.cuda() loss_fn = utils.WeightedMSELoss(size_average=True) optimizer = optim.Adam(model.parameters(), lr=run.getfloat("learning", "lr")) action_selector = ptan.actions.EpsilonGreedyActionSelector( epsilon=run.getfloat("defaults", "epsilon")) target_net = agent.TargetNet(model) dqn_agent = agent.DQNAgent(dqn_model=model, action_selector=action_selector, cuda=cuda_enabled) exp_source = experience.ExperienceSource(env=env_pool, agent=dqn_agent, steps_count=run.getint( "defaults", "n_steps")) exp_replay = experience.ExperienceReplayBuffer(exp_source, buffer_size=run.getint( "exp_buffer", "size")) # exp_replay = experience.PrioritizedReplayBuffer(exp_source, buffer_size=run.getint("exp_buffer", "size"), # prob_alpha=run.getfloat("exp_buffer", "prio_alpha"), # weight_beta=run.getfloat("exp_buffer", "prio_beta")) use_target_dqn = run.getboolean("dqn", "target_dqn", fallback=False) use_double_dqn = run.getboolean("dqn", "double_dqn", fallback=False)