price, action, reward, loss=agent.loss) #print(action, reward) state = next_state if done: print('start', env.start, 'previous', (cash, nown), 'current', tuple(env.holdings)) print("episode: {}/{}, score: {}, e: {:.5}".format( e, EPISODES, time, agent.epsilon)) print('average_loss =', agent.loss / env.init['span']) f.write(str(agent.loss) + '\n') f.flush() agent.loss = 0 if e % 2 == 0: grapher.show(action_labels=env.action_labels, ep=e, t=time, e=agent.epsilon) grapher.reset() agent.save(save_string) break # if len(agent.memory) > batch_size: # agent.replay(batch_size) # Test if e % 2 == 0: state = test_env.reset() state = np.reshape(state, [1, state_size])