Xdiscount = 0
    Xepsi = 0
    Xdec = 0
    rewards = []
    for j in learnRates:
        for k in discounts:
            for ep in epsi:
                for epdec in epsi_decay:
                    params = [episode_count, j, k, ep, epdec]
                    #params = [5000, 0.001,0.95, 1.1, 0.005]
                    #params = [5000, 0.0005,0.99, 0.1]
                    agent = DQNAgent(env.action_space, env.observation_space,
                                     params)

                    agent._render = False
                    rewardList, stepList = agent.train(env)
                    rewards.append(rewardList)
                    if reward < sum(rewards[-1]) / episode_count:
                        reward = sum(rewards[-1]) / episode_count
                        mytrainedAgent = agent
                        XlearnRate = j
                        Xdiscount = k
                        Xepsi = ep
                        Xdec = epdec
                    plt.plot(low_pass(rewardList),
                             label=("Test: " + str(params)))

    ## Train Agent
    #agent._render = False
    #rewardList, stepList =  agent.train(env)
    #agent.saveModel(agent.checkpointpath)
for ep in range(num_episodes):
    if (ep % 1000 == 0):
        print("episode=", ep)
        print("epsilon", sender.epsilon)

    obs = env.get_observation()
    mex = sender.take_action(obs)
    act = receiver.take_action(mex)
    rew = env.get_reward(obs, act)

    sender.store_transition(obs, mex, rew)
    receiver.store_transition(mex, act, rew)
    returns.append(rew)

    sender.train(ep)
    receiver.train(ep)

print("epsilon", sender.epsilon)
print("sender action probabilities")
for s in range(num_act):
    print(sender.get_action_probabilities(s))

print("receiver action probabilities")
for s in range(num_act):
    print(receiver.get_action_probabilities(s))

for s in range(num_act):
    plt.plot(np.linspace(0, num_act, num_act),
             sender.get_action_probabilities(s),
             label=str(s))