Python Problem_cartpole Examples, Problem_cartpole Python Examples

Example #1

0

Show file

File: utree_play_cp.py Project: Guiliang/uTree_mimic_cartpole

def test():
    ice_hockey_problem = Problem_cartpole.CartPole(
        games_directory='../save_all_transition/')
    CUTreeAgent = Agent.CUTreeAgent(problem=ice_hockey_problem,
                                    max_hist=3000,
                                    check_fringe_freq=1200,
                                    is_episodic=0,
                                    training_mode='_linear_epoch_decay_lr')
    CUTreeAgent.read_Utree(game_number=165, save_path=CUTreeAgent.SAVE_PATH)

    reward_list = []
    for i in range(100):
        observation = env.reset()
        done = False
        count = 0
        total_reward = 0

        while not done:
            env.render()

            # action = get_action_similar_instance(observation.tolist(), CUTreeAgent)
            action = 1
            newObservation, reward, done, _ = env.step(action)

            observation = newObservation
            total_reward += reward
            count += 1
            # print('U-tree: The episode ' + str(i) + ' lasted for ' + str(
            #     count) + ' time steps' + ' with action ' + str(action))
        print ' lasted for ' + str(count)
        reward_list.append(total_reward)

    mean, var, h = mean_confidence_interval(reward_list)
    print 'mean:{0}, variance:{2}, +-{1}'.format(str(mean), str(h), str(var))

Example #2

0

Show file

def one_shot_train():
    mc_problem = Problem_cartpole.CartPole(games_directory=opts.GAME_DIRECTORY)
    CUTreeAgent = Agent.CUTreeAgent(problem=mc_problem,
                                    max_hist=opts.MAX_NODE_HIST,
                                    check_fringe_freq=opts.CHECK_FRINGE_FREQ,
                                    is_episodic=0,
                                    training_mode=opts.TRAINING_MODE)
    CUTreeAgent.one_shot_episode()

Example #3

0

Show file

def feature_importance():
    cartpole = Problem_cartpole.CartPole(games_directory=opts.GAME_DIRECTORY)
    CUTreeAgent = Agent.CUTreeAgent(problem=cartpole,
                                    max_hist=opts.MAX_NODE_HIST,
                                    check_fringe_freq=opts.CHECK_FRINGE_FREQ,
                                    is_episodic=0,
                                    training_mode=opts.TRAINING_MODE)
    CUTreeAgent.feature_importance()
    print "hello"

Example #4

0

Show file

File: test_boost_Galen.py Project: Guiliang/uTree_mimic_cartpole

def test():
    cartpole = Problem_cartpole.CartPole(games_directory=opts.GAME_DIRECTORY)
    CUTreeAgent = Agent.CUTreeAgent(problem=cartpole, max_hist=opts.MAX_NODE_HIST,
                                    check_fringe_freq=opts.CHECK_FRINGE_FREQ, is_episodic=0)

    CUTreeAgent.boost_tree_testing_performance(
        save_path='/Local-Scratch/UTree model/cartpole/model_boost_linear_qsplit_noabs_save{0}/'.format(
            opts.TRAINING_MODE),
        read_game_number=opts.GAME_NUMBER, save_correlation_dir=opts.SAVE_CORRELATION_DIR,
        save_mse_dir=opts.SAVE_MSE_DIR, save_mae_dir=opts.SAVE_MAE_DIR, save_rae_dir=opts.SAVE_RAE_DIR,
        save_rse_dir=opts.SAVE_RSE_DIR)

Example #5

0

Show file

def train():
    mc_problem = Problem_cartpole.CartPole(games_directory=opts.GAME_DIRECTORY)
    CUTreeAgent = Agent.CUTreeAgent(problem=mc_problem,
                                    max_hist=opts.MAX_NODE_HIST,
                                    check_fringe_freq=opts.CHECK_FRINGE_FREQ,
                                    is_episodic=0,
                                    training_mode=opts.TRAINING_MODE)

    # CUTreeAgent.add_linear_regression()
    if opts.GAME_NUMBER is None:
        CUTreeAgent.episode(game_number=0)
    else:
        CUTreeAgent.episode(game_number=int(opts.GAME_NUMBER))

Example #6

0

Show file

File: test_butree_decision_boundary.py Project: Guiliang/uTree_mimic_cartpole

def generate_similar_lmu_tree_two_way_decision(input_all, action):
    column_length = len(input_all[0])
    row_length = len(input_all)
    decision_all = np.full((row_length, column_length), np.inf)
    train_game_number = 200
    ice_hockey_problem = Problem_cartpole.CartPole()
    CUTreeAgent = Agent.CUTreeAgent(problem=ice_hockey_problem, max_hist=3000,
                                    check_fringe_freq=1200, is_episodic=0, training_mode='_linear_epoch_decay_lr')
    CUTreeAgent.read_Utree(game_number=train_game_number,
                           save_path='/Local-Scratch/UTree model/cartpole/model_boost_linear_qsplit_noabs_save_linear_epoch_decay_lr/')

    for input_positions_index in range(0, len(input_all)):
        input_positions = input_all[input_positions_index]

        for input_observation_index in range(0, len(input_positions)):
            input_observation = input_positions[input_observation_index]

            min_mse = 999
            mse_criterion = 0.2
            # action = None
            top_actions = []
            Q_value = 0

            for action_test in [action]:
                inst = C_UTree_boost_Galen.Instance(-1, input_observation, action_test, input_observation, None,
                                                    None)  # leaf is located by the current observation
                node = CUTreeAgent.utree.getAbsInstanceLeaf(inst)

                for instance in node.instances:
                    instance_observation = instance.currentObs
                    mse = compute_mse(np.asarray(input_observation), np.asarray(instance_observation))
                    # mse = ((np.asarray(input_observation) - np.asarray(instance_observation)) ** 2).mean()
                    if mse < min_mse:
                        min_mse = mse
                        Q_value = instance.qValue
                        # action = action_test
                    if mse < mse_criterion:
                        top_actions.append(action_test)

                        # if len(top_actions) >= 3:
                        #     done = True
                        #     a = np.asarray(top_actions)
                        #     counts = np.bincount(a)
                        #     action_most = np.argmax(counts)
                        #     # if action != action_most:
                        #     # print 'catch you'
                        #     action = action_most

            decision_all[input_positions_index, input_observation_index] = Q_value

    return decision_all

Example #7

0

Show file

File: find_path_boost_Galen.py Project: Guiliang/uTree_mimic_cartpole

def find_idx_path(idx):
    cartpole = Problem_cartpole.CartPole()
    CUTreeAgent = Agent.CUTreeAgent(problem=cartpole, max_hist=opts.MAX_NODE_HIST,
                                    check_fringe_freq=opts.CHECK_FRINGE_FREQ, is_episodic=0,
                                    training_mode=opts.TRAINING_MODE)
    CUTreeAgent.read_Utree(game_number=165, save_path=CUTreeAgent.SAVE_PATH)
    utree = CUTreeAgent.utree
    # utree.print_tree_structure(CUTreeAgent.PRINT_TREE_PATH)

    flag, path = recursive_find_path(utree.root, idx)
    path_list = path.split(',')
    feature_value_dict = {}
    for path_section in path_list[:-2]:
        path_section = path_section.strip()
        path_section_list = path_section.split(' ')
        feature_name = path_section_list[0]
        value = float(path_section_list[2])

        if feature_value_dict.get(feature_name) is not None:
            feature_value_list = feature_value_dict.get(feature_name)

            if path_section_list[1] == '<':
                index = 1
                feature_value = feature_value_list[index] if feature_value_list[index] < value else value
                feature_value_list[index] = feature_value
            elif path_section_list[1] == '>':
                index = 0
                feature_value = feature_value_list[index] if value < feature_value_list[index] else value
                feature_value_list[index] = feature_value

            feature_value_dict.update({feature_name: feature_value_list})
        else:
            if path_section_list[1] == '<':
                feature_value_dict.update({feature_name: [-10000, value]})
            elif path_section_list[1] == '>':
                feature_value_dict.update({feature_name: [value, 10000]})
            else:
                feature_value_dict.update({feature_name: value})
    # CUTreeAgent.feature_importance()
    print feature_value_dict
    print 'path_length is {0}'.format(len(path_list[:-2]))
    print '{0}'.format(path_list[-2])
    print '{0}'.format(path_list[-1])

    cart_position_list = feature_value_dict.get('Cart_Position')
    if cart_position_list[0] == -10000:
        cart_position = cart_position_list[1] - 0.0000001
    elif cart_position_list[1] == 10000:
        cart_position = cart_position_list[0] + 0.0000001
    else:
        cart_position = sum(cart_position_list) / len(cart_position_list)

    cart_velocity_list = feature_value_dict.get('Cart_Velocity')
    if cart_velocity_list[0] == -10000:
        cart_velocity = cart_velocity_list[1] - 0.0000001
    elif cart_velocity_list[1] == 10000:
        cart_velocity = cart_velocity_list[0] + 0.0000001
    else:
        cart_velocity = sum(cart_velocity_list) / len(cart_velocity_list)

    pole_angle_list = feature_value_dict.get('Pole_Angle')
    if pole_angle_list[0] == -10000:
        pole_angle = pole_angle_list[1] - 0.0000001
    elif pole_angle_list[1] == 10000:
        pole_angle = pole_angle_list[0] + 0.0000001
    else:
        pole_angle = sum(pole_angle_list) / len(pole_angle_list)

    pole_velocity_at_tip_list = feature_value_dict.get('Pole_Velocity_At_Tip')
    if pole_velocity_at_tip_list[0] == -10000:
        pole_velocity_at_tip = pole_velocity_at_tip_list[1] - 0.0000001
    elif pole_velocity_at_tip_list[1] == 10000:
        pole_velocity_at_tip = pole_velocity_at_tip_list[0] + 0.0000001
    else:
        pole_velocity_at_tip = sum(pole_velocity_at_tip_list) / len(pole_velocity_at_tip_list)

    for action_choice in [0, 1]:
        instance = C_UTree.Instance(1000, [cart_position, cart_velocity, pole_angle, pole_velocity_at_tip], action_choice, [cart_position, cart_velocity, pole_angle, pole_velocity_at_tip], None, None)
        node = utree.getAbsInstanceLeaf(inst=instance)
        Q = node.qValues[action_choice]
        print 'idx {2}, action {0}: Q{1}'.format(action_choice, Q, node.idx)

Example #8

0

Show file

File: test_butree_decision_boundary.py Project: Guiliang/uTree_mimic_cartpole

def generate_linear_b_u_tree_one_way_decision(input_all):
    game_testing_record_dict = {}
    train_game_number = 200
    ice_hockey_problem = Problem_cartpole.CartPole()
    CUTreeAgent = Agent.CUTreeAgent(problem=ice_hockey_problem, max_hist=3000,
                                    check_fringe_freq=1200, is_episodic=0, training_mode='_linear_epoch_decay_lr')
    CUTreeAgent.read_Utree(game_number=train_game_number,
                           save_path='/Local-Scratch/UTree model/cartpole/model_boost_linear_qsplit_noabs_save_linear_epoch_decay_lr/')

    index_number = 0

    for input in input_all:

        # for input in input_positions:

        inst_aleft = C_UTree_boost_Galen.Instance(-1, input, 0, input, None,
                                                  None)  # next observation is not important
        inst_aright = C_UTree_boost_Galen.Instance(-1, input, 1, input, None, None)
        # inst_aright = C_UTree_boost_Galen.Instance(-1, input, 2, input, None, None)
        node_aleft = CUTreeAgent.utree.getAbsInstanceLeaf(inst_aleft)
        node_aright = CUTreeAgent.utree.getAbsInstanceLeaf(inst_aright)
        # node_aright = CUTreeAgent.utree.getAbsInstanceLeaf(inst_aright)

        if game_testing_record_dict.get(node_aleft) is None:
            game_testing_record_dict.update({node_aleft: np.array([[input, 0, index_number]])})
        else:
            node_record = game_testing_record_dict.get(node_aleft)
            node_record = np.concatenate((node_record, [[input, 0, index_number]]), axis=0)
            game_testing_record_dict.update({node_aleft: node_record})

        if game_testing_record_dict.get(node_aright) is None:
            game_testing_record_dict.update({node_aright: np.array([[input, 1, index_number]])})
        else:
            node_record = game_testing_record_dict.get(node_aright)
            node_record = np.concatenate((node_record, [[input, 1, index_number]]), axis=0)
            game_testing_record_dict.update({node_aright: node_record})

        index_number += 1

    index_qvalue_record = {}

    for node in game_testing_record_dict.keys():
        node_record = game_testing_record_dict.get(node)
        currentObs_node = node_record[:, 0]
        actions = node_record[:, 1]
        index_numbers = node_record[:, 2]

        # for i in range(0, len(index_numbers)):
        #     min_mse = 999999
        #
        #     currentObs = currentObs_node[i]
        #     for instance in node.instances:
        #         instance_observation = instance.currentObs
        #         mse = ((np.asarray(currentObs) - np.asarray(instance_observation)) ** 2).mean()
        #         if mse < min_mse:
        #             min_mse = mse
        #             Q_value = instance.qValue
        #
        #     if index_qvalue_record.get(index_numbers[i]) is not None:
        #         index_record_dict = index_qvalue_record.get(index_numbers[i])
        #         index_record_dict.update({actions[i]: Q_value})
        #     else:
        #         index_qvalue_record.update({index_numbers[i]: {actions[i]: Q_value}})

        sess = tf.Session()
        LR = linear_regression.LinearRegression()
        LR.read_weights(weights=node.weight, bias=node.bias)
        LR.readout_linear_regression_model()
        sess.run(LR.init)
        qValues_output = sess.run(LR.pred, feed_dict={LR.X: currentObs_node.tolist()})

        for i in range(0, len(index_numbers)):
            if index_qvalue_record.get(index_numbers[i]) is not None:
                index_record_dict = index_qvalue_record.get(index_numbers[i])
                index_record_dict.update({actions[i]: qValues_output[i]})
            else:
                index_qvalue_record.update({index_numbers[i]: {actions[i]: qValues_output[i]}})

    length = len(input_all)
    decision_all = []

    for i in index_qvalue_record:
        index_record_dict = index_qvalue_record.get(i)
        q_left = index_record_dict.get(0)
        q_right = index_record_dict.get(1)
        qValues = [q_left[0], q_right[0]]

        max_action = qValues.index(max(qValues))

        decision_all.append(qValues)

    return decision_all