Exemple #1
0
    costlist = []
    showevery = 10
    for episode in range(numepisodes):
        if episode % 100 == 0:
            showevery = max(1, showevery - 1)

        if episode % showevery == 0:
            render = True
            eps = None
            print('episode', episode, 'l rate', agent.getlearnrate())
            oldavg = avg
        else:
            render = False
            eps = episode
        startt = time.time()
        total_rew, steps, cost, listob, listact = agents.do_rollout(
            agent, env, eps, render=render)

        if episode == 0:
            avg = total_rew
        if episode % 50 == 0:
            print(agent.config)
        if episode % 1 == 0:
            listob = np.array(listob)
            listact = np.array(listact)
            allactionsparse = np.zeros((listact.shape[0], agent.n_out))
            allactionsparse[np.arange(listact.shape[0]), listact] = 1.

            inc = max(0.06, 1. / (episode + 1.)**0.6)
            avg = avg * (1 - inc) + inc * total_rew
            totrewlist.append(total_rew / agent.config['scalereward'])
            totrewavglist.append(avg / agent.config['scalereward'])
Exemple #2
0
    totrewlist = []
    totrewavglist = []
    costlist = []
    showevery = 10
    for episode in range(numepisodes):
        if episode % showevery == 0:
            render = True
            eps = None
            print ('episode', episode, 'l rate', agent.getlearnrate())
            oldavg = avg
        else:
            render = False
            eps = episode
        startt = time.time()
        total_rew, steps, cost, listob, listact = agents.do_rollout(agent, env, eps, render=render)

        if episode == 0:
            avg = total_rew
        if episode % 50 == 0:
            print(agent.config)
        if episode % 1 == 0:
            listob = np.array(listob)
            listact = np.array(listact)
            allactionsparse = np.zeros((listact.shape[0], agent.n_out))
            allactionsparse[np.arange(listact.shape[0]), listact] = 1.

            inc = max(0.06, 1. / (episode + 1.) ** 0.6)
            avg = avg * (1 - inc) + inc * total_rew
            totrewlist.append(total_rew / agent.config['scalereward'])
            totrewavglist.append(avg / agent.config['scalereward'])