def run_maze(): step = 0 for episode in range(5): # initial observation observation = env.reset() while True: # fresh env #env.render() # RL choose action based on observation action = DQN.choose_action(observation) # RL take action and get next observation and reward observation_, reward, done = env.step(action) DQN.store_transition(observation, action, reward, observation_) if (step > 200) and (step % 5 == 0): DQN.learn() # swap observation observation = observation_ # break while loop when end of this episode if done: print('game over') print(env.balance) break step += 1
# action_low = 1 # observation_, reward, done, info = env.step(action_high=action_high, action_low=action_low) # flag = 1 # else: # action_high = 0 # action_low = 1 # observation_, reward, done, info = env.step(action_high=action_high, action_low=action_low) # flag = 0 observation_ = np.array(observation_) print("action_high: " + str(action_high) + " action_low: " + str(action_low)) print("reward: " + str(reward)) print("info: " + str(info)) print("-------------------------") if (step + 1) % 100 == 0: elapsed = (time.clock() - start) print("Step: " + str(step + 1) + " Time Used: " + str(elapsed)) observation = observation_ step += 1 if __name__ == "__main__": LC_env = Env(ego_start_time=100) # Just show dqn = DQN(n_features=6, e_greedy_start=1, e_greedy_increment=0.01, is_save=True, is_restore=True) run_task(env=LC_env, no_gui=False, max_episode=1, net=dqn)
import gym from RL_brain import DQN import matplotlib.pyplot as plt import tensorflow as tf import numpy as np env = gym.make('MountainCar-v0') env = env.unwrapped env.seed(21) MEMORY_SIZE = 10000 sess = tf.Session() with tf.variable_scope('natural_DQN'): RL_natural = DQN( n_actions=3, n_features=2, memory_size=MEMORY_SIZE, e_greedy_increment=0.00005, sess=sess, prioritized=False, ) with tf.variable_scope('DQN_with_prioritized_replay'): RL_prio = DQN( n_actions=3, n_features=2, memory_size=MEMORY_SIZE, e_greedy_increment=0.00005, sess=sess, prioritized=True, output_graph=True, ) sess.run(tf.global_variables_initializer()) def train(RL): total_steps = 0 steps = [] episodes = [] for i_episode in range(20):
# RL choose action based on observation action = RL.choose_action(observation) # RL take action and get next observation and reward observation_, reward, done = env.step(action) RL.store_transition(observation, action, reward, observation_) if (step > 200) and (step % 5 == 0): RL.learn() # swap observation observation = observation_ # break while loop when end of this episode if done: break step += 1 # end of game print('game over') env.destroy() if __name__ == "__main__": # maze game env = Maze() RL = DQN() env.after(100, run_maze) env.mainloop()
import gym from RL_brain import DQN env = gym.make('MountainCar-v0') print(env.action_space) print(env.observation_space) print(env.observation_space.high) print(env.observation_space.low) RL = DQN(n_states=2, n_actions=3, learning_rate=0.001, reward_decay=0.9, e_greedy=0.9, replace_target_iter=300, memory_size=3000, e_greedy_increment=0.002) total_steps = 0 for i_episode in range(100): observation = env.reset() ep_r = 0 while True: env.render() action = RL.choose_action(observation) observation_, reward, done, info = env.step(action)
s_, reward, done = env.step(action) # RL learn from this transition RL.learn(s, action, reward, s_) if len(RL.memory) >= 200: training = np.array(RL.memory) RL.q_model.fit(np.array(training[:, 0]), np.array([i for i in training[:, 1]]), epochs=5) RL.memory = [] # swap observation s = s_ # break while loop when end of this episode if done: #RL.epsilon += 0.001 break G = GrowUp() print("test") for i in range(env.fin_step): q_table = RL.q_model.predict([i]) G.step(np.argmax(q_table)) print(G.score) if __name__ == "__main__": env = GrowUp() RL = DQN(actions=list(range(env.n_actions))) update()
observation = observation_ # break while loop when end of this episode if done == 1: print('win!') break elif done == -1: print('lose!') break step += 1 # end of game print('game over') env.destroy() if __name__ == "__main__": env = Maze() RL = DQN(env.n_features, env.n_actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000) env.after(100, update) env.mainloop() RL.plot_cost()
import gym from RL_brain import DQN env = gym.make('CartPole-v0') print(env.action_space) print(env.observation_space) print(env.observation_space.high) print(env.observation_space.low) RL = DQN( env.observation_space.shape[0], env.action_space.n, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.001) total_steps = 0 for i_episode in range(100): observation = env.reset() ep_r = 0 while True: env.render() action = RL.choose_action(observation)
observation = observation_ # break while loop when end of this episode if done: print('game over') print(env.balance) break step += 1 # end of game #env.destroy() if __name__ == "__main__": # maze game env = FX() DQN = DQN( env.n_actions, env.n_features, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, # output_graph=True ) # env.after(100, run_maze) run_maze() # env.mainloop() DQN.plot_cost()
def run_model(_train_episode=100, _learn_threshold=200, _learn_interval=5, _base_currency='USD', _trade_log_mode='NONE', _trade_log_raw=False, _trade_log_to_file=False, _show_checkout=True, _show_step=True): step = 0 for episode in range(_train_episode): observation, TI_initial, initial_time = env.reset() TI_initial_balance = copy.deepcopy(TI_initial) train_name = TI_initial.account_name while True: action = DQN.choose_action(observation) observation_, reward, done, TI_end, end_time = env.step( action, print_step=_show_step) DQN.store_transition(observation, action, reward, observation_) if (step > _learn_threshold) and (step % _learn_interval == 0): DQN.learn() observation = observation_ if done: print('$' * 20 + ' GAME OVER ' + '$' * 20) TI_initial_balance.account_name = 'Initial_Checkout_Review' + ' (episode: ' + str( episode + 1) + ')' TI_initial_balance.checkout_all_in(initial_time, _base_currency) TI_end_balance = copy.deepcopy(TI_end) TI_end_balance.account_name = 'End_Checkout_Review' + ' (episode: ' + str( episode + 1) + ')' TI_end_balance.checkout_all_in(end_time, _base_currency) print('MI: Initial Time: {}; End Time: {}'.format( initial_time, end_time)) if _show_checkout == True: TI_initial_balance.account_review() TI_end_balance.account_review() if _trade_log_mode == False: pass elif _trade_log_mode == 'ALL': TI_end_balance.trade_log_review(raw_flag=_trade_log_raw) elif _trade_log_mode == 'TWOENDS': TI_end.trade_log_review(tar_action_id=0, raw_flag=_trade_log_raw) TI_end.trade_log_review(tar_action_id='LAST', raw_flag=_trade_log_raw) else: print( 'Invalid _trade_log_mode input ({}). Must be \'ALL\', \'TWOENDS\', or False' .format(_trade_log_mode)) return -1 if _trade_log_to_file: trade_log_base_dir = './logs/trade_logs/' if not os.path.exists(trade_log_base_dir): os.makedirs(trade_log_base_dir) trade_log_file_name = trade_log_base_dir + str(train_name) log_file_readable = open(trade_log_file_name + '.txt', 'w+') with contextlib.redirect_stdout(log_file_readable): TI_end.trade_log_review() print( "### READABLE trade_log of {} successfully exported to: ###\n\t\t{}" .format(str(train_name), trade_log_file_name + '.txt')) log_file_readable.close() log_file_raw = open(trade_log_file_name + '.json', 'w+') json.dump(TI_end.trade_log, log_file_raw, indent=4) print( "### RAW trade_log of {} successfully exported to: ###\n\t\t{}" .format(str(train_name), trade_log_file_name + '.json')) log_file_raw.close() break step += 1
TI_train = TI.Trade_Interface(config_account_name, config_currency_balance, config_from, config_to, config_interval, config_output_arena_csv, config_output_raw_csv) env = FX(TI_train, _base_currency=config_base_currency, _n_features=config_n_features, _anita_switch=config_anita_switch) DQN = DQN(len(env.TI_initial.currency_pairs), env.n_actions, env.n_features, learning_rate=config_learning_rate, reward_decay=config_reward_decay, e_greedy=config_e_greedy, replace_target_iter=config_replace_target_iter, memory_size=config_memory_size, output_graph=config_output_graph) run_model(_train_episode=config_train_episode, _learn_threshold=config_learn_threshold, _learn_interval=config_learn_interval, _base_currency=config_base_currency, _trade_log_mode=config_trade_log_mode, _trade_log_raw=config_trade_log_raw, _trade_log_to_file=config_trade_log_to_file, _show_checkout=config_show_checkout, _show_step=config_show_step)
rate_reset = True env.change_input() state_episode = 0 RL.change_input_count() print('change input:', RL.change_input) else: rate_reset = False check_success_episode = 0 success_rate = 0 break if action_step > 50: break step += 1 # end of game print('game over') env.destroy() if __name__ == "__main__": # maze game env = Maze() RL = DQN(n_actions=env.n_actions, n_features=env.n_features, n_flights=env.n_flights, action_space=env.action_space) env.after(100, run_maze) env.mainloop()
reward = log2_shaping(reward, divide=1) RL.store_transition(state, action, reward, next_state) state = next_state if ifrender: env.render_img() if RL.buffer.memory_counter > RL.memory_capacity: RL.learn() if done: print("Epoch: {}/{}, highest: {}".format( i, episodes, info['highest'])) scores.append(info['highest']) if i % epsilon_decay_interval == 0: RL.epsilon_decay(i, episodes) break return scores if __name__ == "__main__": RL = DQN(num_state=16, num_action=4, dueling=True) scores = train(RL) plt.figure(figsize=(18, 6), dpi=200) plt.figure(1) plt.plot(np.array(scores), c='r') plt.ylabel('highest score') plt.xlabel('training steps') plt.savefig('result.jpg')
from maze_env import Maze from RL_brain import DQN import time # Parameters EPSILON = 0.9 # Greedy Policy ALPHA = 0.1 # Learing Rate LAMBDA = 0.9 # Discount Factor MAX_EPISODE = 50 MAZE_SIZE = 5 TRAP_SET = [[0, 1], [2, 2], [3, 3], [3, 0]] TREASURE_SET = [[4, 4]] env = Maze(MAZE_SIZE) RL = DQN(MAZE_SIZE, EPSILON, LAMBDA, ALPHA) def update(): for episode in range(MAX_EPISODE): O = env.reset() # return initial observation step_number = 0 env.set_trap(TRAP_SET) env.set_treasure(TREASURE_SET) is_terminated = False env.render() while not is_terminated: A = RL.choose_action(O) # print(O) OO, R, is_terminated = env.step(A) # print(O,OO)
import gym from RL_brain import DQN env = gym.make('CartPole-v0') env = env.unwrapped print(env.action_space) print(env.observation_space) print(env.observation_space.high) print(env.observation_space.low) RL = DQN(n_actions=env.action_space.n, n_features=env.observation_space.shape[0], learning_rate=0.01, e_greedy=0.9, replace_target_iter=100, memory_size=2000, e_greedy_increment=0.001,) total_steps = 0 for i_episode in range(100): observation = env.reset() ep_r = 0 while True: env.render() action = RL.choose_action(observation)
from RL_brain import DQN import numpy as np import matplotlib.pyplot as plt import tensorflow as tf env = gym.make('Pendulum-v0') env = env.unwrapped env.seed(1) MEMORY_SIZE = 3000 ACTION_SPACE = 25 sess = tf.Session() with tf.variable_scope('Natural_DQN'): natural_DQN = DQN(n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, dueling=False, sess=sess) with tf.variable_scope('Double_DQN'): double_DQN = DQN(n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE, e_greedy_increment=0.001, dueling=True, sess=sess, output_graph=True) sess.run(tf.global_variables_initializer())
observation_, reward, done = env.step(action) RL.memorize(observation, action, reward, observation_) if (step > 200) and (step % 3 == 0): RL.learn() # swap observation observation = observation_ # break while loop when end of this episode if done: break step += 1 # end of game print('game over') env.destroy() if __name__ == "__main__": # maze game env = Maze() RL = DQN(env.n_actions, memory_size=300, reward_decay=0.9, e_greedy=0.9, replace_target_iter=100) env.after(100, run_maze) env.mainloop()