costlist = [] showevery = 10 for episode in range(numepisodes): if episode % 100 == 0: showevery = max(1, showevery - 1) if episode % showevery == 0: render = True eps = None print('episode', episode, 'l rate', agent.getlearnrate()) oldavg = avg else: render = False eps = episode startt = time.time() total_rew, steps, cost, listob, listact = agents.do_rollout( agent, env, eps, render=render) if episode == 0: avg = total_rew if episode % 50 == 0: print(agent.config) if episode % 1 == 0: listob = np.array(listob) listact = np.array(listact) allactionsparse = np.zeros((listact.shape[0], agent.n_out)) allactionsparse[np.arange(listact.shape[0]), listact] = 1. inc = max(0.06, 1. / (episode + 1.)**0.6) avg = avg * (1 - inc) + inc * total_rew totrewlist.append(total_rew / agent.config['scalereward']) totrewavglist.append(avg / agent.config['scalereward'])
totrewlist = [] totrewavglist = [] costlist = [] showevery = 10 for episode in range(numepisodes): if episode % showevery == 0: render = True eps = None print ('episode', episode, 'l rate', agent.getlearnrate()) oldavg = avg else: render = False eps = episode startt = time.time() total_rew, steps, cost, listob, listact = agents.do_rollout(agent, env, eps, render=render) if episode == 0: avg = total_rew if episode % 50 == 0: print(agent.config) if episode % 1 == 0: listob = np.array(listob) listact = np.array(listact) allactionsparse = np.zeros((listact.shape[0], agent.n_out)) allactionsparse[np.arange(listact.shape[0]), listact] = 1. inc = max(0.06, 1. / (episode + 1.) ** 0.6) avg = avg * (1 - inc) + inc * total_rew totrewlist.append(total_rew / agent.config['scalereward']) totrewavglist.append(avg / agent.config['scalereward'])