cash, nown, price = env.holdings[0], env.holdings[ 1], env.state[-1] # env.render() action = agent.act(state, time) next_state, reward, done, _ = env.step(action) next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action, reward, next_state, done) # agent.train(state, action, reward, next_state, done) if len(agent.memory) > batch_size: agent.replay(batch_size) if e % 2 == 0: #cash, nown, price = state[0, 1], state[0, 2], state[0, -1] # cash, nown, price = *env.holdings, state[0,-1] grapher.add(cash, nown, price, action, reward, loss=agent.loss) #print(action, reward) state = next_state if done: print('start', env.start, 'previous', (cash, nown), 'current', tuple(env.holdings)) print("episode: {}/{}, score: {}, e: {:.5}".format( e, EPISODES, time, agent.epsilon)) print('average_loss =', agent.loss / env.init['span']) f.write(str(agent.loss) + '\n') f.flush() agent.loss = 0 if e % 2 == 0: