コード例 #1
0
ファイル: longshort.py プロジェクト: Tuaman/CS229project
                    state = next_state
                    if done:
                        print('start', env.start, 'previous', (cash, nown),
                              'current', tuple(env.holdings))
                        print("episode: {}/{}, score: {}, e: {:.5}".format(
                            e, EPISODES, time, agent.epsilon))
                        print('average_loss =', agent.loss / env.init['span'])
                        f.write(str(agent.loss) + '\n')
                        f.flush()
                        agent.loss = 0
                        if e % 2 == 0:
                            grapher.show(action_labels=env.action_labels,
                                         ep=e,
                                         t=time,
                                         e=agent.epsilon)
                            grapher.reset()
                            agent.save(save_string)
                        break
                    # if len(agent.memory) > batch_size:
                    #     agent.replay(batch_size)

                # Test
                if e % 2 == 0:
                    state = test_env.reset()
                    state = np.reshape(state, [1, state_size])
                    for time in range(500):
                        cash, nown, price = test_env.holdings[
                            0], test_env.holdings[1], test_env.state[-1]
                        action = agent.act(state, time, is_test=True)
                        next_state, reward, done, _ = test_env.step(action)
                        next_state = np.reshape(next_state, [1, state_size])