コード例 #1
0
ファイル: longshort.py プロジェクト: Tuaman/CS229project
                    cash, nown, price = env.holdings[0], env.holdings[
                        1], env.state[-1]
                    # env.render()
                    action = agent.act(state, time)
                    next_state, reward, done, _ = env.step(action)
                    next_state = np.reshape(next_state, [1, state_size])
                    agent.remember(state, action, reward, next_state, done)
                    # agent.train(state, action, reward, next_state, done)
                    if len(agent.memory) > batch_size:
                        agent.replay(batch_size)
                    if e % 2 == 0:
                        #cash, nown, price = state[0, 1], state[0, 2], state[0, -1]
                        # cash, nown, price = *env.holdings, state[0,-1]
                        grapher.add(cash,
                                    nown,
                                    price,
                                    action,
                                    reward,
                                    loss=agent.loss)
                        #print(action, reward)

                    state = next_state
                    if done:
                        print('start', env.start, 'previous', (cash, nown),
                              'current', tuple(env.holdings))
                        print("episode: {}/{}, score: {}, e: {:.5}".format(
                            e, EPISODES, time, agent.epsilon))
                        print('average_loss =', agent.loss / env.init['span'])
                        f.write(str(agent.loss) + '\n')
                        f.flush()
                        agent.loss = 0
                        if e % 2 == 0: