Esempi in Python per StockEnv.reset, esempi in Python per stock_env.StockEnv.reset

Esempio n. 1

0

Mostra file

File: DQN_trade.py Progetto: tianhm/DQN-trade

def main(train = False):
	data = np.loadtxt('./data.csv',delimiter = ',',skiprows=1)
	data = data[230:-1]  #delete the first day data
	angent = DQN_Trade()
    
	for i in range(0,10):
		iters =len(data)/240
		for iter_step in range(0,iters):
			#print iter_step
			iter_data =data[iter_step*240:iter_step*240+240]
			env =StockEnv(iter_data)
			s = env.reset()
			while True:
				action = angent.egreedy_action(s)
				s_,reward,done =env.gostep(action)
				print action
				angent.precive(s,action,reward,s_,done)
				s= s_
				if done:
					break
		angent.save_model(step=i)

Esempio n. 2

0

Mostra file

def main(train=False):
    data = np.loadtxt('./data.csv', delimiter=',', skiprows=1)
    data = data[230:-1]  #delete the first day data
    angent = DQN_Trade()

    for i in range(0, 10):
        iters = len(data) / 240
        for iter_step in range(0, iters):
            #print iter_step
            iter_data = data[iter_step * 240:iter_step * 240 + 240]
            env = StockEnv(iter_data)
            s = env.reset()
            while True:
                action = angent.egreedy_action(s)
                s_, reward, done = env.gostep(action)
                print(action)
                angent.precive(s, action, reward, s_, done)
                s = s_
                if done:
                    break
        angent.save_model(step=i)

Esempio n. 3

0

Mostra file

File: A3C.py Progetto: linbirg/RL

class Worker(object):
    def __init__(self, name, globalAC):
        self.env = StockEnv()
        self.name = name
        self.AC = ACNet(name, globalAC)

    def work(self):
        global GLOBAL_RUNNING_R, GLOBAL_EP
        total_step = 1
        buffer_s, buffer_a, buffer_r = [], [], []
        while not COORD.should_stop() and GLOBAL_EP < MAX_GLOBAL_EP:
            s = self.env.reset()
            ep_r = 0
            for ep_t in range(MAX_EP_STEP):
                if self.name == 'W_0':
                    self.env.render()
                a = self.AC.choose_action(s)
                s_, r, done = self.env.step(a)
                if ep_t == MAX_EP_STEP - 1: done = True
                ep_r += r
                buffer_s.append(s)
                buffer_a.append(a)
                buffer_r.append(r)

                if total_step % UPDATE_GLOBAL_ITER == 0 or done:  # update global and assign to local net
                    if done:
                        v_s_ = 0  # terminal
                    else:
                        v_s_ = SESS.run(self.AC.v,
                                        {self.AC.s: s_[np.newaxis, :]})[0, 0]
                    buffer_v_target = []
                    for r in buffer_r[::-1]:  # reverse buffer r
                        v_s_ = r + GAMMA * v_s_
                        buffer_v_target.append(v_s_)
                    buffer_v_target.reverse()

                    buffer_s, buffer_a, buffer_v_target = np.vstack(
                        buffer_s), np.vstack(buffer_a), np.vstack(
                            buffer_v_target)
                    feed_dict = {
                        self.AC.s: buffer_s,
                        self.AC.a_his: buffer_a,
                        self.AC.v_target: buffer_v_target,
                    }
                    test = self.AC.update_global(feed_dict)
                    buffer_s, buffer_a, buffer_r = [], [], []
                    self.AC.pull_global()

                s = s_
                total_step += 1
                if done:
                    if len(GLOBAL_RUNNING_R
                           ) == 0:  # record running episode reward
                        GLOBAL_RUNNING_R.append(ep_r)
                    else:
                        GLOBAL_RUNNING_R.append(0.9 * GLOBAL_RUNNING_R[-1] +
                                                0.1 * ep_r)
                    print(
                        self.name,
                        "Ep:",
                        GLOBAL_EP,
                        "| Ep_r: %i" % GLOBAL_RUNNING_R[-1],
                        '| Var:',
                        test,
                    )
                    GLOBAL_EP += 1
                    break

Esempio n. 4

0

Mostra file

File: A3C.py Progetto: linbirg/RL

MAX_GLOBAL_EP = 2000
MAX_EP_STEP = 300
UPDATE_GLOBAL_ITER = 5
N_WORKERS = multiprocessing.cpu_count()
LR_A = 1e-4  # learning rate for actor
LR_C = 2e-4  # learning rate for critic
GAMMA = 0.9  # reward discount
# MODE = ['easy', 'hard']
# n_model = 1
GLOBAL_NET_SCOPE = 'Global_Net'
ENTROPY_BETA = 0.01
GLOBAL_RUNNING_R = []
GLOBAL_EP = 0

env = StockEnv()
N_S = env.reset().shape[0]
N_A = 1
A_BOUND = env.action_bound[1]
del env


class ACNet(object):
    def __init__(self, scope, globalAC=None):

        if scope == GLOBAL_NET_SCOPE:  # get global network
            with tf.variable_scope(scope):
                self.s = tf.placeholder(tf.float32, [None, N_S], 'S')
                self._build_net()
                self.a_params = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/actor')
                self.c_params = tf.get_collection(

Esempio n. 5

0

Mostra file

            total += self.portfolio[i] * p
        self.value.append(total + self.balance)


### Begin Simulation ###

env = StockEnv(NUM_SECTORS)
agent = q_agent(len(env.sectors))
fig = plt.figure()
ax = fig.add_subplot(111)
fig.suptitle('Hard Coded Agent')

for episode in range(NUM_EPISODES):
    # reset the environment and initialize the portfolio value
    agent.reset()
    p0 = env.reset()
    agent.update_value(p0)

    for t in range(MAX_T):
        # select the next action
        action = agent.select_action(p0)
        # execute the next action and get next state and reward
        p = env.step()

        for i, a in enumerate(action):
            agent.act(i, a, p[i])

        agent.update_value(p)

        # render the portfolio value graph
        env.render(ax, agent.value)

Esempio n. 6

0

Mostra file

num_episods = 30000
gamma = 0.9

reward_history = []
duration_history = []
prev_score = avg_return = None
decaying = 0.99
e = 1.0
for episode in range(num_episods):
    # e = 1.0 / (np.sqrt(episode) * 1 + 1)
    e = 1.0 / (episode / 10 + 1)
    # e = ramdom_action_prob
    # e = 0.2 / (episode / 5000 + 1) * 0.5 * (1 + np.cos(2 * np.pi * episode/5000))
    if episode > 0.9 * num_episods:
        e = 0.0
    state = env.reset()

    reward_sum = 0.0
    for step in range(5000):
        # if episode % 100 == 0:
        #     env.render()
        if np.random.rand(1) < e:
            a = env.random_action()
        else:
            Qs = predDQN(state)
            _, i = torch.max(Qs.data, 0)
            a = i[0]
        new_state, reward, done, info = env.step(a)

        replay_buffer.append(Replay(state, a, new_state, reward, done))
        if len(replay_buffer) > BUFFER_SIZE:

Esempio n. 7

0

Mostra file

File: test_stocks.py Progetto: yijerjer/reinforcement_learning

if not test:
    ac.train()
else:
    df = pd.read_csv("csvs/norm_all_stocks_5yr.csv")
    stock_df = df[df.Name == env.test_stock_name]

    policy_mlp = ac.policy_mlp
    policy_mlp.load_state_dict(torch.load("policy_mlp.pth"))
    value_mlp = ac.value_mlp
    value_mlp.load_state_dict(torch.load("value_mlp.pth"))

    obss = []
    actions = []
    rewards = []

    obs = env.reset()
    while True:
        obss.append(obs)
        action, _ = policy_mlp(torch.as_tensor(obs, dtype=torch.float32))
        obs, reward, done, _ = env.step(action.detach().numpy())
        actions.append(action)
        rewards.append(reward)

        if done:
            break

    obss_passive = []
    actions_passive = []
    rewards_passive = []

    obs = env.reset()

Esempio n. 8

0

Mostra file

class Worker(object):
    def __init__(self, name, globalAC):
        self.env = StockEnv()
        self.name = name
        self.AC = ACNet(name,self.env.get_state().shape[0], 4, globalAC)

    def _update_global_reward(self, ep_r):
        global GLOBAL_RUNNING_R, GLOBAL_EP
        if len(GLOBAL_RUNNING_R) == 0:  # record running episode reward
            GLOBAL_RUNNING_R.append(ep_r)
        else:
            GLOBAL_RUNNING_R.append(0.99 * GLOBAL_RUNNING_R[-1] + 0.01 * ep_r)
            logger.debug(
                [self.name,
                "Ep:",
                GLOBAL_EP,
                "| Ep_r: %i" % GLOBAL_RUNNING_R[-1]]
            )
            GLOBAL_EP += 1

    def _update_globa_acnet(self, done, s_, buffer_s, buffer_a, buffer_r):
        if done:
            v_s_ = 0  # terminal
        else:
            v_s_ = SESS.run(self.AC.v, {self.AC.s: s_[np.newaxis, :]})[0, 0]
            buffer_v_target = []
            for r in buffer_r[::-1]:  # reverse buffer r
                v_s_ = r + GAMMA * v_s_
                buffer_v_target.append(v_s_)
            buffer_v_target.reverse()

            buffer_s, buffer_a, buffer_v_target = np.vstack(
                buffer_s), np.array(buffer_a), np.vstack(buffer_v_target)
            feed_dict = {
                self.AC.s: buffer_s,
                self.AC.a_his: buffer_a,
                self.AC.v_target: buffer_v_target,
            }
            self.AC.update_global(feed_dict)

    def work(self):
        total_step = 1
        buffer_s, buffer_a, buffer_r = [], [], []
        self.env.reset()
        if self.name == 'W_0':
            self.env.render()
        while not COORD.should_stop():
            ep_r = 0
            while True:
                s = self.env._get_state()
                a, p = self.AC.choose_action(s)
                s_, r, done = self.env.step(a)
                if done: 
                    r = -0.5
                ep_r += r
                buffer_s.append(s)
                buffer_a.append(a)
                buffer_r.append(r)

                if total_step % UPDATE_GLOBAL_ITER == 0 or done:
                    self._update_globa_acnet(done, s_, buffer_s, buffer_a,
                                             buffer_r)
                    buffer_s, buffer_a, buffer_r = [], [], []
                    self.AC.pull_global()
                # s = s_
                total_step += 1
                if done:
                    self._update_global_reward(ep_r)
                    break
                
                if self.name == 'W_0':
                    logger.debug(["s", s, " a:", a, " p:", p, " r:", r, " total_step:", total_step, 'total', self.env.total])
                    time.sleep(0.5)

    def train(self):
        global GLOBAL_RUNNING_R, GLOBAL_EP
        total_step = 1
        buffer_s, buffer_a, buffer_r = [], [], []
        while not COORD.should_stop() and GLOBAL_EP < MAX_GLOBAL_EP:
            s = self.env.reset()
            ep_r = 0
            while True:
                # if self.name == 'W_0':
                    # self.env.render()
                a, p = self.AC.choose_action(s)
                s_, r, done = self.env.step(a)
                if done: r = -0.5
                ep_r += r
                buffer_s.append(s)
                buffer_a.append(a)
                buffer_r.append(r)

                if total_step % UPDATE_GLOBAL_ITER == 0 or done:  # update global and assign to local net
                    self._update_globa_acnet(done, s_, buffer_s, buffer_a,
                                             buffer_r)
                    buffer_s, buffer_a, buffer_r = [], [], []
                    self.AC.pull_global()

                if done:
                    self._update_global_reward(ep_r)
                    logger.debug(["s", s, " a:", a, " p:", p, " r:", r, " total_step:", total_step, 'total', self.env.total])
                    break

                s = s_
                total_step += 1

Esempio n. 9

0

Mostra file

class Worker(object):
    GAMMA = 0.9
    GLOBAL_RUNNING_R = []
    GLOBAL_EP = 0

    def __init__(self, sess, name, N_S, N_A, globalAC):
        self.SESS = sess
        self.N_S = N_S
        self.N_A = N_A
        self.env = StockEnv()
        self.name = name
        self.AC = A3CNet(self.SESS, self.name, self.N_S, self.N_A, globalAC)
        # self.saver = tf.train.Saver()

    def _record_global_reward_and_print(self, global_runing_rs, ep_r,
                                        global_ep, total_step):
        global_runing_rs.append(ep_r)
        try:
            print(self.name, "Ep:", global_ep,
                  "| Ep_r: %i" % global_runing_rs[-1], "| total step:",
                  total_step)
        except Exception as e:
            print(e)

    def train(self):
        buffer_s, buffer_a, buffer_r = [], [], []
        s = self.env.reset()
        ep_r = 0
        total_step = 1

        def reset():
            nonlocal ep_r, total_step
            self.env.reset()
            ep_r = 0
            total_step = 1

        while not COORD.should_stop() and self.GLOBAL_EP < MAX_GLOBAL_EP:
            # s = self.env.reset()
            # ep_r = 0
            # total_step = 1
            reset()
            while total_step < MAX_TOTAL_STEP:
                try:
                    s = self.env.get_state()
                    a, p = self.AC.choose_action(s)
                    s_, r, done = self.env.step(a)
                    if done:
                        r = -2

                    ep_r += r
                    buffer_s.append(s)
                    buffer_a.append(a)
                    buffer_r.append(r)

                    if total_step % UPDATE_GLOBAL_ITER == 0 or done:  # update global and assign to local net
                        self.AC.update(done, s_, buffer_r, buffer_s, buffer_a)
                        buffer_s, buffer_a, buffer_r = [], [], []

                    if done:
                        self._record_global_reward_and_print(
                            self.GLOBAL_RUNNING_R, ep_r, self.GLOBAL_EP,
                            total_step)
                        self.GLOBAL_EP += 1
                        reset()

                    # s = s_
                    total_step += 1
                    if self.name == 'W_0':
                        self.env.render()
                        time.sleep(0.05)
                        logger.debug([
                            "s ", s, " v ",
                            self.AC.get_v(s), " a ", a, " p ", p, " ep_r ",
                            ep_r, " total ", self.env.total, " acct ",
                            self.env.acct
                        ])
                except Exception as e:
                    print(e)

            try:
                print(self.name, " not done,may be donkey!", " total_step:",
                      total_step)
            except Exception as e:
                print(e)