(k_end - k_start) * reward_x + (k_start * reward_sup - reward_inf * k_end)) / (reward_sup - reward_inf) kends = [1., 1.25, 1.5, 1.75, 2., 2.25, 2.5, 2.75, 3] current_model = NoisyDQN(env.observation_space.shape[0], env.action_space.n, env) target_model = NoisyDQN(env.observation_space.shape[0], env.action_space.n, env) if USE_CUDA: current_model = current_model.cuda() target_model = target_model.cuda() optimizer = optim.Adam(current_model.parameters(), lr=0.0001) replay_buffer = tl.BaseReplayBuffer(10000) def update_target(current_model, target_model): target_model.load_state_dict(current_model.state_dict()) update_target(current_model, target_model) losses_all = [] rewards_all = [] for k_end in kends: k_start = 0 reward_inf = -20 reward_sup = 0
env_id = game_name env = make_atari(env_id) env = wrap_deepmind(env) env = wrap_pytorch(env) current_model = CnnNoisyDQN(env.observation_space.shape, env.action_space.n, env) target_model = CnnNoisyDQN(env.observation_space.shape, env.action_space.n, env) if USE_CUDA: current_model = current_model.cuda() target_model = target_model.cuda() optimizer = optim.Adam(current_model.parameters(), lr = learningRate) replay_buffer = tl.BaseReplayBuffer(capacity) def update_target(current_model, target_model): target_model.load_state_dict(current_model.state_dict()) update_target(current_model, target_model) losses_all = [] rewards_all = [] for i in range(arvg_num): losses = [] all_rewards = [] episode_reward = 0 state = env.reset() current_model = CnnNoisyDQN(env.observation_space.shape, env.action_space.n, env) target_model = CnnNoisyDQN(env.observation_space.shape, env.action_space.n, env)