def run_maze(): step = 0 for episode in range(5): # initial observation observation = env.reset() while True: # fresh env #env.render() # RL choose action based on observation action = DQN.choose_action(observation) # RL take action and get next observation and reward observation_, reward, done = env.step(action) DQN.store_transition(observation, action, reward, observation_) if (step > 200) and (step % 5 == 0): DQN.learn() # swap observation observation = observation_ # break while loop when end of this episode if done: print('game over') print(env.balance) break step += 1
ep_r = 0 while True: env.render() action = RL.choose_action(observation) observation_, reward, done, info = env.step(action) # the smaller theta and closer to center the better x, x_dot, theta, theta_dot = observation_ r1 = (env.x_threshold - abs(x))/env.x_threshold - 0.8 r2 = (env.theta_threshold_radians - abs(theta))/env.theta_threshold_radians - 0.5 reward = r1 + r2 RL.store_transition(observation, action, reward, observation_) ep_r += reward if total_steps > 1000: RL.learn() if done: print('episode: ', i_episode, 'ep_r: ', round(ep_r, 2), ' epsilon: ', round(RL.epsilon, 2)) break observation = observation_ total_steps += 1 #RL.plot_cost()
def run_model(_train_episode=100, _learn_threshold=200, _learn_interval=5, _base_currency='USD', _trade_log_mode='NONE', _trade_log_raw=False, _trade_log_to_file=False, _show_checkout=True, _show_step=True): step = 0 for episode in range(_train_episode): observation, TI_initial, initial_time = env.reset() TI_initial_balance = copy.deepcopy(TI_initial) train_name = TI_initial.account_name while True: action = DQN.choose_action(observation) observation_, reward, done, TI_end, end_time = env.step( action, print_step=_show_step) DQN.store_transition(observation, action, reward, observation_) if (step > _learn_threshold) and (step % _learn_interval == 0): DQN.learn() observation = observation_ if done: print('$' * 20 + ' GAME OVER ' + '$' * 20) TI_initial_balance.account_name = 'Initial_Checkout_Review' + ' (episode: ' + str( episode + 1) + ')' TI_initial_balance.checkout_all_in(initial_time, _base_currency) TI_end_balance = copy.deepcopy(TI_end) TI_end_balance.account_name = 'End_Checkout_Review' + ' (episode: ' + str( episode + 1) + ')' TI_end_balance.checkout_all_in(end_time, _base_currency) print('MI: Initial Time: {}; End Time: {}'.format( initial_time, end_time)) if _show_checkout == True: TI_initial_balance.account_review() TI_end_balance.account_review() if _trade_log_mode == False: pass elif _trade_log_mode == 'ALL': TI_end_balance.trade_log_review(raw_flag=_trade_log_raw) elif _trade_log_mode == 'TWOENDS': TI_end.trade_log_review(tar_action_id=0, raw_flag=_trade_log_raw) TI_end.trade_log_review(tar_action_id='LAST', raw_flag=_trade_log_raw) else: print( 'Invalid _trade_log_mode input ({}). Must be \'ALL\', \'TWOENDS\', or False' .format(_trade_log_mode)) return -1 if _trade_log_to_file: trade_log_base_dir = './logs/trade_logs/' if not os.path.exists(trade_log_base_dir): os.makedirs(trade_log_base_dir) trade_log_file_name = trade_log_base_dir + str(train_name) log_file_readable = open(trade_log_file_name + '.txt', 'w+') with contextlib.redirect_stdout(log_file_readable): TI_end.trade_log_review() print( "### READABLE trade_log of {} successfully exported to: ###\n\t\t{}" .format(str(train_name), trade_log_file_name + '.txt')) log_file_readable.close() log_file_raw = open(trade_log_file_name + '.json', 'w+') json.dump(TI_end.trade_log, log_file_raw, indent=4) print( "### RAW trade_log of {} successfully exported to: ###\n\t\t{}" .format(str(train_name), trade_log_file_name + '.json')) log_file_raw.close() break step += 1