Example #1
0
                                    price,
                                    action,
                                    reward,
                                    loss=agent.loss)
                        #print(action, reward)

                    state = next_state
                    if done:
                        print('start', env.start, 'previous', (cash, nown),
                              'current', tuple(env.holdings))
                        print("episode: {}/{}, score: {}, e: {:.5}".format(
                            e, EPISODES, time, agent.epsilon))
                        print('average_loss =', agent.loss / env.init['span'])
                        f.write(str(agent.loss) + '\n')
                        f.flush()
                        agent.loss = 0
                        if e % 2 == 0:
                            grapher.show(action_labels=env.action_labels,
                                         ep=e,
                                         t=time,
                                         e=agent.epsilon)
                            grapher.reset()
                            agent.save(save_string)
                        break
                    # if len(agent.memory) > batch_size:
                    #     agent.replay(batch_size)

                # Test
                if e % 2 == 0:
                    state = test_env.reset()
                    state = np.reshape(state, [1, state_size])