コード例 #1
0
def run_maze():
    step = 0
    for episode in range(5):
        # initial observation
        observation = env.reset()

        while True:
            # fresh env
            #env.render()

            # RL choose action based on observation
            action = DQN.choose_action(observation)

            # RL take action and get next observation and reward
            observation_, reward, done = env.step(action)

            DQN.store_transition(observation, action, reward, observation_)

            if (step > 200) and (step % 5 == 0):
                DQN.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                print('game over')
                print(env.balance)
                break
            step += 1
コード例 #2
0
            #     action_low = 1
            #     observation_, reward, done, info = env.step(action_high=action_high, action_low=action_low)
            #     flag = 1
            # else:
            #     action_high = 0
            #     action_low = 1
            #     observation_, reward, done, info = env.step(action_high=action_high, action_low=action_low)
            #     flag = 0
            observation_ = np.array(observation_)
            print("action_high: " + str(action_high) + " action_low: " +
                  str(action_low))
            print("reward: " + str(reward))
            print("info: " + str(info))
            print("-------------------------")
            if (step + 1) % 100 == 0:
                elapsed = (time.clock() - start)
                print("Step: " + str(step + 1) + " Time Used: " + str(elapsed))
            observation = observation_
            step += 1


if __name__ == "__main__":
    LC_env = Env(ego_start_time=100)
    # Just show
    dqn = DQN(n_features=6,
              e_greedy_start=1,
              e_greedy_increment=0.01,
              is_save=True,
              is_restore=True)
    run_task(env=LC_env, no_gui=False, max_episode=1, net=dqn)
コード例 #3
0
import gym
from RL_brain import DQN
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np

env = gym.make('MountainCar-v0')
env = env.unwrapped
env.seed(21)
MEMORY_SIZE = 10000

sess = tf.Session()
with tf.variable_scope('natural_DQN'):
    RL_natural = DQN(
        n_actions=3, n_features=2, memory_size=MEMORY_SIZE,
        e_greedy_increment=0.00005, sess=sess, prioritized=False,
    )

with tf.variable_scope('DQN_with_prioritized_replay'):
    RL_prio = DQN(
        n_actions=3, n_features=2, memory_size=MEMORY_SIZE,
        e_greedy_increment=0.00005, sess=sess, prioritized=True, output_graph=True,
    )
sess.run(tf.global_variables_initializer())


def train(RL):
    total_steps = 0
    steps = []
    episodes = []
    for i_episode in range(20):
コード例 #4
0
            # RL choose action based on observation
            action = RL.choose_action(observation)

            # RL take action and get next observation and reward
            observation_, reward, done = env.step(action)

            RL.store_transition(observation, action, reward, observation_)

            if (step > 200) and (step % 5 == 0):
                RL.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
            step += 1

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()
    RL = DQN()
    env.after(100, run_maze)
    env.mainloop()
コード例 #5
0
import gym
from RL_brain import DQN

env = gym.make('MountainCar-v0')
print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)

RL = DQN(n_states=2,
         n_actions=3,
         learning_rate=0.001,
         reward_decay=0.9,
         e_greedy=0.9,
         replace_target_iter=300,
         memory_size=3000,
         e_greedy_increment=0.002)

total_steps = 0

for i_episode in range(100):

    observation = env.reset()
    ep_r = 0
    while True:
        env.render()

        action = RL.choose_action(observation)

        observation_, reward, done, info = env.step(action)
コード例 #6
0
            s_, reward, done = env.step(action)
            # RL learn from this transition
            RL.learn(s, action, reward, s_)
            if len(RL.memory) >= 200:

                training = np.array(RL.memory)
                RL.q_model.fit(np.array(training[:, 0]),
                               np.array([i for i in training[:, 1]]),
                               epochs=5)
                RL.memory = []
            # swap observation
            s = s_
            # break while loop when end of this episode
            if done:
                #RL.epsilon += 0.001
                break

    G = GrowUp()
    print("test")
    for i in range(env.fin_step):
        q_table = RL.q_model.predict([i])
        G.step(np.argmax(q_table))
    print(G.score)


if __name__ == "__main__":
    env = GrowUp()
    RL = DQN(actions=list(range(env.n_actions)))

    update()
コード例 #7
0
            observation = observation_

            # break while loop when end of this episode
            if done == 1:
                print('win!')
                break
            elif done == -1:
                print('lose!')
                break

            step += 1

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    RL = DQN(env.n_features,
             env.n_actions,
             learning_rate=0.01,
             reward_decay=0.9,
             e_greedy=0.9,
             replace_target_iter=200,
             memory_size=2000)

    env.after(100, update)
    env.mainloop()
    RL.plot_cost()
コード例 #8
0
import gym
from RL_brain import DQN

env = gym.make('CartPole-v0')
print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)


RL = DQN(
    env.observation_space.shape[0],
    env.action_space.n,
    learning_rate=0.01,
    reward_decay=0.9,
    e_greedy=0.9,
    replace_target_iter=100,
    memory_size=2000,
    e_greedy_increment=0.001)

total_steps = 0

for i_episode in range(100):

    observation = env.reset()
    ep_r = 0
    while True:
        env.render()

        action = RL.choose_action(observation)
コード例 #9
0
            observation = observation_

            # break while loop when end of this episode
            if done:
                print('game over')
                print(env.balance)
                break
            step += 1

    # end of game

    #env.destroy()


if __name__ == "__main__":
    # maze game
    env = FX()
    DQN = DQN(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )
    #     env.after(100, run_maze)
    run_maze()
    #     env.mainloop()
    DQN.plot_cost()
コード例 #10
0
def run_model(_train_episode=100,
              _learn_threshold=200,
              _learn_interval=5,
              _base_currency='USD',
              _trade_log_mode='NONE',
              _trade_log_raw=False,
              _trade_log_to_file=False,
              _show_checkout=True,
              _show_step=True):

    step = 0

    for episode in range(_train_episode):
        observation, TI_initial, initial_time = env.reset()
        TI_initial_balance = copy.deepcopy(TI_initial)
        train_name = TI_initial.account_name

        while True:
            action = DQN.choose_action(observation)
            observation_, reward, done, TI_end, end_time = env.step(
                action, print_step=_show_step)
            DQN.store_transition(observation, action, reward, observation_)

            if (step > _learn_threshold) and (step % _learn_interval == 0):
                DQN.learn()
            observation = observation_

            if done:
                print('$' * 20 + ' GAME OVER ' + '$' * 20)

                TI_initial_balance.account_name = 'Initial_Checkout_Review' + ' (episode: ' + str(
                    episode + 1) + ')'
                TI_initial_balance.checkout_all_in(initial_time,
                                                   _base_currency)
                TI_end_balance = copy.deepcopy(TI_end)
                TI_end_balance.account_name = 'End_Checkout_Review' + ' (episode: ' + str(
                    episode + 1) + ')'
                TI_end_balance.checkout_all_in(end_time, _base_currency)
                print('MI: Initial Time: {}; End Time: {}'.format(
                    initial_time, end_time))

                if _show_checkout == True:
                    TI_initial_balance.account_review()
                    TI_end_balance.account_review()

                if _trade_log_mode == False:
                    pass
                elif _trade_log_mode == 'ALL':
                    TI_end_balance.trade_log_review(raw_flag=_trade_log_raw)
                elif _trade_log_mode == 'TWOENDS':
                    TI_end.trade_log_review(tar_action_id=0,
                                            raw_flag=_trade_log_raw)
                    TI_end.trade_log_review(tar_action_id='LAST',
                                            raw_flag=_trade_log_raw)
                else:
                    print(
                        'Invalid _trade_log_mode input ({}). Must be \'ALL\', \'TWOENDS\', or False'
                        .format(_trade_log_mode))
                    return -1

                if _trade_log_to_file:
                    trade_log_base_dir = './logs/trade_logs/'
                    if not os.path.exists(trade_log_base_dir):
                        os.makedirs(trade_log_base_dir)
                    trade_log_file_name = trade_log_base_dir + str(train_name)

                    log_file_readable = open(trade_log_file_name + '.txt',
                                             'w+')
                    with contextlib.redirect_stdout(log_file_readable):
                        TI_end.trade_log_review()
                    print(
                        "### READABLE trade_log of {} successfully exported to: ###\n\t\t{}"
                        .format(str(train_name), trade_log_file_name + '.txt'))
                    log_file_readable.close()

                    log_file_raw = open(trade_log_file_name + '.json', 'w+')
                    json.dump(TI_end.trade_log, log_file_raw, indent=4)
                    print(
                        "### RAW trade_log of {} successfully exported to: ###\n\t\t{}"
                        .format(str(train_name),
                                trade_log_file_name + '.json'))
                    log_file_raw.close()

                break
            step += 1
コード例 #11
0
    TI_train = TI.Trade_Interface(config_account_name, config_currency_balance,
                                  config_from, config_to, config_interval,
                                  config_output_arena_csv,
                                  config_output_raw_csv)

    env = FX(TI_train,
             _base_currency=config_base_currency,
             _n_features=config_n_features,
             _anita_switch=config_anita_switch)

    DQN = DQN(len(env.TI_initial.currency_pairs),
              env.n_actions,
              env.n_features,
              learning_rate=config_learning_rate,
              reward_decay=config_reward_decay,
              e_greedy=config_e_greedy,
              replace_target_iter=config_replace_target_iter,
              memory_size=config_memory_size,
              output_graph=config_output_graph)

    run_model(_train_episode=config_train_episode,
              _learn_threshold=config_learn_threshold,
              _learn_interval=config_learn_interval,
              _base_currency=config_base_currency,
              _trade_log_mode=config_trade_log_mode,
              _trade_log_raw=config_trade_log_raw,
              _trade_log_to_file=config_trade_log_to_file,
              _show_checkout=config_show_checkout,
              _show_step=config_show_step)
コード例 #12
0
                            rate_reset = True
                            env.change_input()
                            state_episode = 0
                            RL.change_input_count()
                            print('change input:', RL.change_input)
                        else:
                            rate_reset = False
                        check_success_episode = 0
                        success_rate = 0

                break

            if action_step > 50:
                break
            step += 1

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()
    RL = DQN(n_actions=env.n_actions,
             n_features=env.n_features,
             n_flights=env.n_flights,
             action_space=env.action_space)
    env.after(100, run_maze)
    env.mainloop()
コード例 #13
0
            reward = log2_shaping(reward, divide=1)

            RL.store_transition(state, action, reward, next_state)
            state = next_state

            if ifrender:
                env.render_img()

            if RL.buffer.memory_counter > RL.memory_capacity:
                RL.learn()

            if done:
                print("Epoch: {}/{}, highest: {}".format(
                    i, episodes, info['highest']))
                scores.append(info['highest'])
                if i % epsilon_decay_interval == 0:
                    RL.epsilon_decay(i, episodes)
                break
    return scores


if __name__ == "__main__":
    RL = DQN(num_state=16, num_action=4, dueling=True)
    scores = train(RL)

    plt.figure(figsize=(18, 6), dpi=200)
    plt.figure(1)
    plt.plot(np.array(scores), c='r')
    plt.ylabel('highest score')
    plt.xlabel('training steps')
    plt.savefig('result.jpg')
コード例 #14
0
ファイル: run_DQN.py プロジェクト: fzc20070415/RL-Demo
from maze_env import Maze
from RL_brain import DQN
import time

# Parameters
EPSILON = 0.9  # Greedy Policy
ALPHA = 0.1  # Learing Rate
LAMBDA = 0.9  # Discount Factor
MAX_EPISODE = 50
MAZE_SIZE = 5
TRAP_SET = [[0, 1], [2, 2], [3, 3], [3, 0]]
TREASURE_SET = [[4, 4]]

env = Maze(MAZE_SIZE)
RL = DQN(MAZE_SIZE, EPSILON, LAMBDA, ALPHA)


def update():
    for episode in range(MAX_EPISODE):
        O = env.reset()  # return initial observation
        step_number = 0
        env.set_trap(TRAP_SET)
        env.set_treasure(TREASURE_SET)
        is_terminated = False
        env.render()

        while not is_terminated:
            A = RL.choose_action(O)
            # print(O)
            OO, R, is_terminated = env.step(A)
            # print(O,OO)
コード例 #15
0

import gym
from RL_brain import DQN

env = gym.make('CartPole-v0')
env = env.unwrapped

print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)

RL = DQN(n_actions=env.action_space.n,
                  n_features=env.observation_space.shape[0],
                  learning_rate=0.01, e_greedy=0.9,
                  replace_target_iter=100, memory_size=2000,
                  e_greedy_increment=0.001,)

total_steps = 0


for i_episode in range(100):

    observation = env.reset()
    ep_r = 0
    while True:
        env.render()

        action = RL.choose_action(observation)
コード例 #16
0
from RL_brain import DQN
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

env = gym.make('Pendulum-v0')
env = env.unwrapped
env.seed(1)
MEMORY_SIZE = 3000
ACTION_SPACE = 25

sess = tf.Session()
with tf.variable_scope('Natural_DQN'):
    natural_DQN = DQN(n_actions=ACTION_SPACE,
                      n_features=3,
                      memory_size=MEMORY_SIZE,
                      e_greedy_increment=0.001,
                      dueling=False,
                      sess=sess)

with tf.variable_scope('Double_DQN'):
    double_DQN = DQN(n_actions=ACTION_SPACE,
                     n_features=3,
                     memory_size=MEMORY_SIZE,
                     e_greedy_increment=0.001,
                     dueling=True,
                     sess=sess,
                     output_graph=True)

sess.run(tf.global_variables_initializer())

コード例 #17
0
            observation_, reward, done = env.step(action)

            RL.memorize(observation, action, reward, observation_)

            if (step > 200) and (step % 3 == 0):
                RL.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
            step += 1

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()
    RL = DQN(env.n_actions,
             memory_size=300,
             reward_decay=0.9,
             e_greedy=0.9,
             replace_target_iter=100)
    env.after(100, run_maze)
    env.mainloop()