Esempio n. 1
0
class Worker(object):
    def __init__(self, wid):
        trading_fee = .007
        time_fee = .0073
        history_length = 1
        #self.env = gym.make(GAME).unwrapped
        generator = get_CSV_data(filename="./test_6.csv")
        self.env = SpreadTrading(spread_coefficients=[1],
                                 data_generator=generator,
                                 trading_fee=trading_fee,
                                 time_fee=time_fee,
                                 history_length=history_length)
        self.wid = wid
        self.ppo = GLOBAL_PPO

    def work(self):
        global GLOBAL_EP, GLOBAL_RUNNING_R, GLOBAL_UPDATE_COUNTER
        while not COORD.should_stop():
            s = self.env.reset()
            #print("=======")
            #print(s)
            #print("========")
            ep_r = 0
            buffer_s, buffer_a, buffer_r = [], [], []
            for t in range(EP_LEN):
                if not ROLLING_EVENT.is_set():  # while global PPO is updating
                    ROLLING_EVENT.wait()  # wait until PPO is updated
                    buffer_s, buffer_a, buffer_r = [], [], [
                    ]  # clear history buffer, use new policy to collect data
                a = self.ppo.choose_action(s)
                #print("=========")
                #print("a: ", a)
                #print("=========")
                s_, r, done, _ = self.env.step(a)
                buffer_s.append(s)
                buffer_a.append(a)
                buffer_r.append(
                    (r + 8) / 8)  # normalize reward, find to be useful
                s = s_
                ep_r += r

                GLOBAL_UPDATE_COUNTER += 1  # count to minimum batch size, no need to wait other workers
                if t == EP_LEN - 1 or GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE:
                    v_s_ = self.ppo.get_v(s_)
                    discounted_r = []  # compute discounted reward
                    for r in buffer_r[::-1]:
                        v_s_ = r + GAMMA * v_s_
                        discounted_r.append(v_s_)
                    discounted_r.reverse()

                    bs, ba, br = np.vstack(buffer_s), np.vstack(
                        buffer_a), np.array(discounted_r)[:, np.newaxis]
                    buffer_s, buffer_a, buffer_r = [], [], []
                    QUEUE.put(np.hstack((bs, ba, br)))  # put data in the queue
                    if GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE:
                        ROLLING_EVENT.clear()  # stop collecting data
                        UPDATE_EVENT.set()  # globalPPO update

                    if GLOBAL_EP >= EP_MAX:  # stop training
                        COORD.request_stop()
                        break

            # record reward changes, plot later
            if len(GLOBAL_RUNNING_R) == 0: GLOBAL_RUNNING_R.append(ep_r)
            else:
                GLOBAL_RUNNING_R.append(GLOBAL_RUNNING_R[-1] * 0.9 +
                                        ep_r * 0.1)
            GLOBAL_EP += 1
            print(
                '{0:.1f}%'.format(GLOBAL_EP / EP_MAX * 100),
                '|W%i' % self.wid,
                '|Ep_r: %.2f' % ep_r,
            )
Esempio n. 2
0
    threads = []
    for worker in workers:  # worker threads
        t = threading.Thread(target=worker.work, args=())
        t.start()  # training
        threads.append(t)
    # add a PPO updating thread
    threads.append(threading.Thread(target=GLOBAL_PPO.update, ))
    threads[-1].start()
    COORD.join(threads)

    # plot reward change and test
    plt.plot(np.arange(len(GLOBAL_RUNNING_R)), GLOBAL_RUNNING_R)
    plt.xlabel('Episode')
    plt.ylabel('Moving reward')
    plt.ion()
    plt.show()
    #env = gym.make('Pendulum-v0')
    trading_fee = .007
    time_fee = .00724
    history_length = 1
    generator = get_CSV_data(filename="./test_6.csv")
    env = SpreadTrading(spread_coefficients=[1],
                        data_generator=generator,
                        trading_fee=trading_fee,
                        time_fee=time_fee,
                        history_length=history_length)
    while True:
        s = env.reset()
        for t in range(3455):
            env.render()
            s = env.step(GLOBAL_PPO.choose_action(s))[0]
Esempio n. 3
0
#Classes and variables
generator = CSVStreamer(filename='/Users/tawehbeysolow/Downloads/amazon_order_book_data2.csv')
#generator = WavySignal(period_1=25, period_2=50, epsilon=-0.5)

memory = Memory(max_size=memory_size)

generator = WavySignal(period_1=25, period_2=50, epsilon=-0.5)


environment = SpreadTrading(spread_coefficients=[1],
                            data_generator=generator,
                            trading_fee=trading_fee,
                            time_fee=time_fee,
                            history_length=history_length)

state_size = len(environment.reset())


def baseline_model(n_actions, info, random=False):
    
    if random == True:
        action = np.random.choice(range(n_actions), p=np.repeat(1/float(n_actions), 3))
        action = possible_actions[action]

    else:
        
        if len(info) == 0:
            action = np.random.choice(range(n_actions), p=np.repeat(1/float(n_actions), 3))
            action = possible_actions[action]
        
        elif info['action'] == 'sell':
Esempio n. 4
0
# from generators.tickergenerator import TickerGenerator
# Instantiating the environmnent
generator = CSVStreamer(filename="data/" + market + "-history.csv")
episodes = 7600
episode_length = 200
trading_fee = .2
time_fee = 0
history_length = 5

environment = SpreadTrading(spread_coefficients=[1],
                            data_generator=generator,
                            trading_fee=trading_fee,
                            time_fee=time_fee,
                            history_length=history_length)
state = environment.reset()

# Instantiating the agent
memory_size = 3000
state_size = len(state)
gamma = 0.96
epsilon_min = 0.01
batch_size = 64
action_size = len(SpreadTrading._actions)
train_interval = 10
learning_rate = 0.001

if not os.path.isfile("./model." + market + ".h5"):
    agent = DQNAgent(state_size=state_size,
                     action_size=action_size,
                     memory_size=memory_size,
Esempio n. 5
0
sell = np.array([0, 0, 1])
possible_actions = [hold, buy, sell]

#Classes and variables
generator = CSVStreamer(filename='/Users/tawehbeysolow/Downloads/amazon_order_book_data2.csv')
#generator = WavySignal(period_1=25, period_2=50, epsilon=-0.5)

memory = Memory(max_size=memory_size)

environment = SpreadTrading(spread_coefficients=[1],
                            data_generator=generator,
                            trading_fee=trading_fee,
                            time_fee=time_fee,
                            history_length=history_length)

state_size = len(environment.reset())


def baseline_model(n_actions, info, random=False):
    
    if random == True:
        action = np.random.choice(range(n_actions), p=np.repeat(1/float(n_actions), 3))
        action = possible_actions[action]

    else:
        
        if len(info) == 0:
            action = np.random.choice(range(n_actions), p=np.repeat(1/float(n_actions), 3))
            action = possible_actions[action]
        
        elif info['action'] == 'sell':
Esempio n. 6
0
        S_ = tf.placeholder(tf.float32, shape=[None, state_size], name='s_')

    sess = tf.Session()

    # Create actor and critic.
    actor = Actor(sess, action_size, LR_A, REPLACE_ITER_A)
    critic = Critic(sess, state_size, action_size, LR_C, GAMMA, REPLACE_ITER_C,
                    actor.a, actor.a_)
    actor.add_grad_to_graph(critic.a_grads)

    M = Memory(MEMORY_CAPACITY, dims=2 * state_size + action_size + 1)

    sess.run(tf.global_variables_initializer())

    for i in range(171):
        s = environment.reset()
        #s = OD.DGroup(s)
        ep_reward = 0
        #print("=============")
        #print("s: ", s)
        #print("=============")
        for j in range(3443):
            a = actor.choose_action(s)
            #print("=============")
            #print("s: ", s, " --- ", j)
            #print("=============")
            s_, r, done, _ = environment.step(a)

            #s_ = OD.DGroup(s_)
            #print("=============")
            #print("s_: ", s_, " ---- ", j)