(k_end - k_start) * reward_x +
    (k_start * reward_sup - reward_inf * k_end)) / (reward_sup - reward_inf)
kends = [1., 1.25, 1.5, 1.75, 2., 2.25, 2.5, 2.75, 3]

current_model = NoisyDQN(env.observation_space.shape[0], env.action_space.n,
                         env)
target_model = NoisyDQN(env.observation_space.shape[0], env.action_space.n,
                        env)

if USE_CUDA:
    current_model = current_model.cuda()
    target_model = target_model.cuda()

optimizer = optim.Adam(current_model.parameters(), lr=0.0001)

replay_buffer = tl.BaseReplayBuffer(10000)


def update_target(current_model, target_model):
    target_model.load_state_dict(current_model.state_dict())


update_target(current_model, target_model)

losses_all = []
rewards_all = []

for k_end in kends:
    k_start = 0
    reward_inf = -20
    reward_sup = 0
env_id = game_name
env    = make_atari(env_id)
env    = wrap_deepmind(env)
env    = wrap_pytorch(env)

current_model = CnnNoisyDQN(env.observation_space.shape, env.action_space.n, env)
target_model  = CnnNoisyDQN(env.observation_space.shape, env.action_space.n, env)


if USE_CUDA:
    current_model = current_model.cuda()
    target_model  = target_model.cuda()
    
optimizer = optim.Adam(current_model.parameters(), lr = learningRate)

replay_buffer = tl.BaseReplayBuffer(capacity)

def update_target(current_model, target_model):
    target_model.load_state_dict(current_model.state_dict())

update_target(current_model, target_model)

losses_all = []
rewards_all = []
for i in range(arvg_num):
    losses = []
    all_rewards = []
    episode_reward = 0
    state = env.reset()
    current_model = CnnNoisyDQN(env.observation_space.shape, env.action_space.n, env)
    target_model  = CnnNoisyDQN(env.observation_space.shape, env.action_space.n, env)