state = next_state if done: print('start', env.start, 'previous', (cash, nown), 'current', tuple(env.holdings)) print("episode: {}/{}, score: {}, e: {:.5}".format( e, EPISODES, time, agent.epsilon)) print('average_loss =', agent.loss / env.init['span']) f.write(str(agent.loss) + '\n') f.flush() agent.loss = 0 if e % 2 == 0: grapher.show(action_labels=env.action_labels, ep=e, t=time, e=agent.epsilon) grapher.reset() agent.save(save_string) break # if len(agent.memory) > batch_size: # agent.replay(batch_size) # Test if e % 2 == 0: state = test_env.reset() state = np.reshape(state, [1, state_size]) for time in range(500): cash, nown, price = test_env.holdings[ 0], test_env.holdings[1], test_env.state[-1] action = agent.act(state, time, is_test=True) next_state, reward, done, _ = test_env.step(action) next_state = np.reshape(next_state, [1, state_size])