Beispiel #1
0
def run_maze():
    step = 0
    for episode in range(5):
        # initial observation
        observation = env.reset()

        while True:
            # fresh env
            #env.render()

            # RL choose action based on observation
            action = DQN.choose_action(observation)

            # RL take action and get next observation and reward
            observation_, reward, done = env.step(action)

            DQN.store_transition(observation, action, reward, observation_)

            if (step > 200) and (step % 5 == 0):
                DQN.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                print('game over')
                print(env.balance)
                break
            step += 1
Beispiel #2
0
                  n_features=env.observation_space.shape[0],
                  learning_rate=0.01, e_greedy=0.9,
                  replace_target_iter=100, memory_size=2000,
                  e_greedy_increment=0.001,)

total_steps = 0


for i_episode in range(100):

    observation = env.reset()
    ep_r = 0
    while True:
        env.render()

        action = RL.choose_action(observation)

        observation_, reward, done, info = env.step(action)

        # the smaller theta and closer to center the better
        x, x_dot, theta, theta_dot = observation_
        r1 = (env.x_threshold - abs(x))/env.x_threshold - 0.8
        r2 = (env.theta_threshold_radians - abs(theta))/env.theta_threshold_radians - 0.5
        reward = r1 + r2

        RL.store_transition(observation, action, reward, observation_)

        ep_r += reward
        if total_steps > 1000:
            RL.learn()
Beispiel #3
0
def run_model(_train_episode=100,
              _learn_threshold=200,
              _learn_interval=5,
              _base_currency='USD',
              _trade_log_mode='NONE',
              _trade_log_raw=False,
              _trade_log_to_file=False,
              _show_checkout=True,
              _show_step=True):

    step = 0

    for episode in range(_train_episode):
        observation, TI_initial, initial_time = env.reset()
        TI_initial_balance = copy.deepcopy(TI_initial)
        train_name = TI_initial.account_name

        while True:
            action = DQN.choose_action(observation)
            observation_, reward, done, TI_end, end_time = env.step(
                action, print_step=_show_step)
            DQN.store_transition(observation, action, reward, observation_)

            if (step > _learn_threshold) and (step % _learn_interval == 0):
                DQN.learn()
            observation = observation_

            if done:
                print('$' * 20 + ' GAME OVER ' + '$' * 20)

                TI_initial_balance.account_name = 'Initial_Checkout_Review' + ' (episode: ' + str(
                    episode + 1) + ')'
                TI_initial_balance.checkout_all_in(initial_time,
                                                   _base_currency)
                TI_end_balance = copy.deepcopy(TI_end)
                TI_end_balance.account_name = 'End_Checkout_Review' + ' (episode: ' + str(
                    episode + 1) + ')'
                TI_end_balance.checkout_all_in(end_time, _base_currency)
                print('MI: Initial Time: {}; End Time: {}'.format(
                    initial_time, end_time))

                if _show_checkout == True:
                    TI_initial_balance.account_review()
                    TI_end_balance.account_review()

                if _trade_log_mode == False:
                    pass
                elif _trade_log_mode == 'ALL':
                    TI_end_balance.trade_log_review(raw_flag=_trade_log_raw)
                elif _trade_log_mode == 'TWOENDS':
                    TI_end.trade_log_review(tar_action_id=0,
                                            raw_flag=_trade_log_raw)
                    TI_end.trade_log_review(tar_action_id='LAST',
                                            raw_flag=_trade_log_raw)
                else:
                    print(
                        'Invalid _trade_log_mode input ({}). Must be \'ALL\', \'TWOENDS\', or False'
                        .format(_trade_log_mode))
                    return -1

                if _trade_log_to_file:
                    trade_log_base_dir = './logs/trade_logs/'
                    if not os.path.exists(trade_log_base_dir):
                        os.makedirs(trade_log_base_dir)
                    trade_log_file_name = trade_log_base_dir + str(train_name)

                    log_file_readable = open(trade_log_file_name + '.txt',
                                             'w+')
                    with contextlib.redirect_stdout(log_file_readable):
                        TI_end.trade_log_review()
                    print(
                        "### READABLE trade_log of {} successfully exported to: ###\n\t\t{}"
                        .format(str(train_name), trade_log_file_name + '.txt'))
                    log_file_readable.close()

                    log_file_raw = open(trade_log_file_name + '.json', 'w+')
                    json.dump(TI_end.trade_log, log_file_raw, indent=4)
                    print(
                        "### RAW trade_log of {} successfully exported to: ###\n\t\t{}"
                        .format(str(train_name),
                                trade_log_file_name + '.json'))
                    log_file_raw.close()

                break
            step += 1