def test(): ice_hockey_problem = Problem_cartpole.CartPole( games_directory='../save_all_transition/') CUTreeAgent = Agent.CUTreeAgent(problem=ice_hockey_problem, max_hist=3000, check_fringe_freq=1200, is_episodic=0, training_mode='_linear_epoch_decay_lr') CUTreeAgent.read_Utree(game_number=165, save_path=CUTreeAgent.SAVE_PATH) reward_list = [] for i in range(100): observation = env.reset() done = False count = 0 total_reward = 0 while not done: env.render() # action = get_action_similar_instance(observation.tolist(), CUTreeAgent) action = 1 newObservation, reward, done, _ = env.step(action) observation = newObservation total_reward += reward count += 1 # print('U-tree: The episode ' + str(i) + ' lasted for ' + str( # count) + ' time steps' + ' with action ' + str(action)) print ' lasted for ' + str(count) reward_list.append(total_reward) mean, var, h = mean_confidence_interval(reward_list) print 'mean:{0}, variance:{2}, +-{1}'.format(str(mean), str(h), str(var))
def one_shot_train(): mc_problem = Problem_cartpole.CartPole(games_directory=opts.GAME_DIRECTORY) CUTreeAgent = Agent.CUTreeAgent(problem=mc_problem, max_hist=opts.MAX_NODE_HIST, check_fringe_freq=opts.CHECK_FRINGE_FREQ, is_episodic=0, training_mode=opts.TRAINING_MODE) CUTreeAgent.one_shot_episode()
def feature_importance(): cartpole = Problem_cartpole.CartPole(games_directory=opts.GAME_DIRECTORY) CUTreeAgent = Agent.CUTreeAgent(problem=cartpole, max_hist=opts.MAX_NODE_HIST, check_fringe_freq=opts.CHECK_FRINGE_FREQ, is_episodic=0, training_mode=opts.TRAINING_MODE) CUTreeAgent.feature_importance() print "hello"
def test(): cartpole = Problem_cartpole.CartPole(games_directory=opts.GAME_DIRECTORY) CUTreeAgent = Agent.CUTreeAgent(problem=cartpole, max_hist=opts.MAX_NODE_HIST, check_fringe_freq=opts.CHECK_FRINGE_FREQ, is_episodic=0) CUTreeAgent.boost_tree_testing_performance( save_path='/Local-Scratch/UTree model/cartpole/model_boost_linear_qsplit_noabs_save{0}/'.format( opts.TRAINING_MODE), read_game_number=opts.GAME_NUMBER, save_correlation_dir=opts.SAVE_CORRELATION_DIR, save_mse_dir=opts.SAVE_MSE_DIR, save_mae_dir=opts.SAVE_MAE_DIR, save_rae_dir=opts.SAVE_RAE_DIR, save_rse_dir=opts.SAVE_RSE_DIR)
def train(): mc_problem = Problem_cartpole.CartPole(games_directory=opts.GAME_DIRECTORY) CUTreeAgent = Agent.CUTreeAgent(problem=mc_problem, max_hist=opts.MAX_NODE_HIST, check_fringe_freq=opts.CHECK_FRINGE_FREQ, is_episodic=0, training_mode=opts.TRAINING_MODE) # CUTreeAgent.add_linear_regression() if opts.GAME_NUMBER is None: CUTreeAgent.episode(game_number=0) else: CUTreeAgent.episode(game_number=int(opts.GAME_NUMBER))
def generate_similar_lmu_tree_two_way_decision(input_all, action): column_length = len(input_all[0]) row_length = len(input_all) decision_all = np.full((row_length, column_length), np.inf) train_game_number = 200 ice_hockey_problem = Problem_cartpole.CartPole() CUTreeAgent = Agent.CUTreeAgent(problem=ice_hockey_problem, max_hist=3000, check_fringe_freq=1200, is_episodic=0, training_mode='_linear_epoch_decay_lr') CUTreeAgent.read_Utree(game_number=train_game_number, save_path='/Local-Scratch/UTree model/cartpole/model_boost_linear_qsplit_noabs_save_linear_epoch_decay_lr/') for input_positions_index in range(0, len(input_all)): input_positions = input_all[input_positions_index] for input_observation_index in range(0, len(input_positions)): input_observation = input_positions[input_observation_index] min_mse = 999 mse_criterion = 0.2 # action = None top_actions = [] Q_value = 0 for action_test in [action]: inst = C_UTree_boost_Galen.Instance(-1, input_observation, action_test, input_observation, None, None) # leaf is located by the current observation node = CUTreeAgent.utree.getAbsInstanceLeaf(inst) for instance in node.instances: instance_observation = instance.currentObs mse = compute_mse(np.asarray(input_observation), np.asarray(instance_observation)) # mse = ((np.asarray(input_observation) - np.asarray(instance_observation)) ** 2).mean() if mse < min_mse: min_mse = mse Q_value = instance.qValue # action = action_test if mse < mse_criterion: top_actions.append(action_test) # if len(top_actions) >= 3: # done = True # a = np.asarray(top_actions) # counts = np.bincount(a) # action_most = np.argmax(counts) # # if action != action_most: # # print 'catch you' # action = action_most decision_all[input_positions_index, input_observation_index] = Q_value return decision_all
def find_idx_path(idx): cartpole = Problem_cartpole.CartPole() CUTreeAgent = Agent.CUTreeAgent(problem=cartpole, max_hist=opts.MAX_NODE_HIST, check_fringe_freq=opts.CHECK_FRINGE_FREQ, is_episodic=0, training_mode=opts.TRAINING_MODE) CUTreeAgent.read_Utree(game_number=165, save_path=CUTreeAgent.SAVE_PATH) utree = CUTreeAgent.utree # utree.print_tree_structure(CUTreeAgent.PRINT_TREE_PATH) flag, path = recursive_find_path(utree.root, idx) path_list = path.split(',') feature_value_dict = {} for path_section in path_list[:-2]: path_section = path_section.strip() path_section_list = path_section.split(' ') feature_name = path_section_list[0] value = float(path_section_list[2]) if feature_value_dict.get(feature_name) is not None: feature_value_list = feature_value_dict.get(feature_name) if path_section_list[1] == '<': index = 1 feature_value = feature_value_list[index] if feature_value_list[index] < value else value feature_value_list[index] = feature_value elif path_section_list[1] == '>': index = 0 feature_value = feature_value_list[index] if value < feature_value_list[index] else value feature_value_list[index] = feature_value feature_value_dict.update({feature_name: feature_value_list}) else: if path_section_list[1] == '<': feature_value_dict.update({feature_name: [-10000, value]}) elif path_section_list[1] == '>': feature_value_dict.update({feature_name: [value, 10000]}) else: feature_value_dict.update({feature_name: value}) # CUTreeAgent.feature_importance() print feature_value_dict print 'path_length is {0}'.format(len(path_list[:-2])) print '{0}'.format(path_list[-2]) print '{0}'.format(path_list[-1]) cart_position_list = feature_value_dict.get('Cart_Position') if cart_position_list[0] == -10000: cart_position = cart_position_list[1] - 0.0000001 elif cart_position_list[1] == 10000: cart_position = cart_position_list[0] + 0.0000001 else: cart_position = sum(cart_position_list) / len(cart_position_list) cart_velocity_list = feature_value_dict.get('Cart_Velocity') if cart_velocity_list[0] == -10000: cart_velocity = cart_velocity_list[1] - 0.0000001 elif cart_velocity_list[1] == 10000: cart_velocity = cart_velocity_list[0] + 0.0000001 else: cart_velocity = sum(cart_velocity_list) / len(cart_velocity_list) pole_angle_list = feature_value_dict.get('Pole_Angle') if pole_angle_list[0] == -10000: pole_angle = pole_angle_list[1] - 0.0000001 elif pole_angle_list[1] == 10000: pole_angle = pole_angle_list[0] + 0.0000001 else: pole_angle = sum(pole_angle_list) / len(pole_angle_list) pole_velocity_at_tip_list = feature_value_dict.get('Pole_Velocity_At_Tip') if pole_velocity_at_tip_list[0] == -10000: pole_velocity_at_tip = pole_velocity_at_tip_list[1] - 0.0000001 elif pole_velocity_at_tip_list[1] == 10000: pole_velocity_at_tip = pole_velocity_at_tip_list[0] + 0.0000001 else: pole_velocity_at_tip = sum(pole_velocity_at_tip_list) / len(pole_velocity_at_tip_list) for action_choice in [0, 1]: instance = C_UTree.Instance(1000, [cart_position, cart_velocity, pole_angle, pole_velocity_at_tip], action_choice, [cart_position, cart_velocity, pole_angle, pole_velocity_at_tip], None, None) node = utree.getAbsInstanceLeaf(inst=instance) Q = node.qValues[action_choice] print 'idx {2}, action {0}: Q{1}'.format(action_choice, Q, node.idx)
def generate_linear_b_u_tree_one_way_decision(input_all): game_testing_record_dict = {} train_game_number = 200 ice_hockey_problem = Problem_cartpole.CartPole() CUTreeAgent = Agent.CUTreeAgent(problem=ice_hockey_problem, max_hist=3000, check_fringe_freq=1200, is_episodic=0, training_mode='_linear_epoch_decay_lr') CUTreeAgent.read_Utree(game_number=train_game_number, save_path='/Local-Scratch/UTree model/cartpole/model_boost_linear_qsplit_noabs_save_linear_epoch_decay_lr/') index_number = 0 for input in input_all: # for input in input_positions: inst_aleft = C_UTree_boost_Galen.Instance(-1, input, 0, input, None, None) # next observation is not important inst_aright = C_UTree_boost_Galen.Instance(-1, input, 1, input, None, None) # inst_aright = C_UTree_boost_Galen.Instance(-1, input, 2, input, None, None) node_aleft = CUTreeAgent.utree.getAbsInstanceLeaf(inst_aleft) node_aright = CUTreeAgent.utree.getAbsInstanceLeaf(inst_aright) # node_aright = CUTreeAgent.utree.getAbsInstanceLeaf(inst_aright) if game_testing_record_dict.get(node_aleft) is None: game_testing_record_dict.update({node_aleft: np.array([[input, 0, index_number]])}) else: node_record = game_testing_record_dict.get(node_aleft) node_record = np.concatenate((node_record, [[input, 0, index_number]]), axis=0) game_testing_record_dict.update({node_aleft: node_record}) if game_testing_record_dict.get(node_aright) is None: game_testing_record_dict.update({node_aright: np.array([[input, 1, index_number]])}) else: node_record = game_testing_record_dict.get(node_aright) node_record = np.concatenate((node_record, [[input, 1, index_number]]), axis=0) game_testing_record_dict.update({node_aright: node_record}) index_number += 1 index_qvalue_record = {} for node in game_testing_record_dict.keys(): node_record = game_testing_record_dict.get(node) currentObs_node = node_record[:, 0] actions = node_record[:, 1] index_numbers = node_record[:, 2] # for i in range(0, len(index_numbers)): # min_mse = 999999 # # currentObs = currentObs_node[i] # for instance in node.instances: # instance_observation = instance.currentObs # mse = ((np.asarray(currentObs) - np.asarray(instance_observation)) ** 2).mean() # if mse < min_mse: # min_mse = mse # Q_value = instance.qValue # # if index_qvalue_record.get(index_numbers[i]) is not None: # index_record_dict = index_qvalue_record.get(index_numbers[i]) # index_record_dict.update({actions[i]: Q_value}) # else: # index_qvalue_record.update({index_numbers[i]: {actions[i]: Q_value}}) sess = tf.Session() LR = linear_regression.LinearRegression() LR.read_weights(weights=node.weight, bias=node.bias) LR.readout_linear_regression_model() sess.run(LR.init) qValues_output = sess.run(LR.pred, feed_dict={LR.X: currentObs_node.tolist()}) for i in range(0, len(index_numbers)): if index_qvalue_record.get(index_numbers[i]) is not None: index_record_dict = index_qvalue_record.get(index_numbers[i]) index_record_dict.update({actions[i]: qValues_output[i]}) else: index_qvalue_record.update({index_numbers[i]: {actions[i]: qValues_output[i]}}) length = len(input_all) decision_all = [] for i in index_qvalue_record: index_record_dict = index_qvalue_record.get(i) q_left = index_record_dict.get(0) q_right = index_record_dict.get(1) qValues = [q_left[0], q_right[0]] max_action = qValues.index(max(qValues)) decision_all.append(qValues) return decision_all