Ejemplo n.º 1
0
 def get_node_hcount(self, node):
     is_infeasible_parent_action = node.state is None
     if is_infeasible_parent_action:
         return len(
             get_objects_to_move(node.parent.state,
                                 node.parent.state.problem_env))
     else:
         return len(get_objects_to_move(node.state, node.state.problem_env))
 def __init__(self, state, ucb_parameter, depth, state_saver, is_operator_skeleton_node, is_init_node, actions,
              learned_q):
     # psa is based on the number of objs to move
     DiscreteTreeNode.__init__(self, state, ucb_parameter, depth, state_saver, is_operator_skeleton_node,
                               is_init_node, actions, learned_q)
     is_infeasible_state = self.state is None
     if is_infeasible_state:
         for a in self.A:
             self.Q[a] = 0
     else:
         objs_to_move = get_objects_to_move(self.state, self.state.problem_env)
         for a in self.A:
             self.Q[a] = -len(objs_to_move)
    def get_action_with_highest_ucb_value(self, actions, q_values):
        best_value = -np.inf

        if self.learned_q is not None:
            # todo make this more efficient by calling predict_with_raw_*
            init_q_values = [self.learned_q.predict(self.state, a) for a in actions]
            exp_sum = np.sum([np.exp(q) for q in init_q_values])
        else:
            objects_to_move = get_objects_to_move(self.state, self.state.problem_env)
            init_q_values = []
            for a in actions:
                obj_name = a.discrete_parameters['object']
                region_name = a.discrete_parameters['region']
                o_reachable = self.state.is_entity_reachable(obj_name)
                o_r_manip_free = self.state.binary_edges[(obj_name, region_name)][-1]
                o_needs_to_be_moved = obj_name in objects_to_move
                if o_reachable and o_r_manip_free and o_needs_to_be_moved:
                    val = 1
                else:
                    val = 0
                init_q_values.append(val)
            exp_sum = np.sum([np.exp(q) for q in init_q_values])

        action_ucb_values = []

        for action, value, learned_value in zip(actions, q_values, init_q_values):
            q_bonus = np.exp(learned_value) / float(exp_sum)
            ucb_value = value + q_bonus + self.compute_ucb_value(action)

            obj_name = action.discrete_parameters['object']
            region_name = action.discrete_parameters['region']
            print "%30s %30s Reachable? %d  ManipFree? %d IsGoal? %d Q? %.5f QBonus? %.5f Q+UCB? %.5f" \
                  % (obj_name, region_name, self.state.is_entity_reachable(obj_name),
                     self.state.binary_edges[(obj_name, region_name)][-1],
                     obj_name in self.state.goal_entities, self.Q[action], q_bonus,
                     ucb_value)

            action_ucb_values.append(ucb_value)
            if ucb_value > best_value:
                best_value = ucb_value

        boolean_idxs_with_highest_ucb = (np.max(action_ucb_values) == action_ucb_values).squeeze()
        best_action_idx = np.random.randint(np.sum(boolean_idxs_with_highest_ucb))
        best_action = np.array(actions)[boolean_idxs_with_highest_ucb][best_action_idx]
        return best_action
Ejemplo n.º 4
0
def main():
    parameters = parse_mover_problem_parameters()
    filename = 'pidx_%d_planner_seed_%d.pkl' % (parameters.pidx, parameters.planner_seed)
    save_dir = make_and_get_save_dir(parameters, filename)

    set_seed(parameters.pidx)
    problem_env = PaPMoverEnv(parameters.pidx)

    goal_object_names = [obj.GetName() for obj in problem_env.objects[:parameters.n_objs_pack]]
    goal_region_name = [problem_env.regions['home_region'].name]
    goal = goal_region_name + goal_object_names
    problem_env.set_goal(goal)

    goal_entities = goal_object_names + goal_region_name
    if parameters.use_shaped_reward:
        reward_function = ShapedRewardFunction(problem_env, goal_object_names, goal_region_name[0],
                                               parameters.planning_horizon)
    else:
        reward_function = GenericRewardFunction(problem_env, goal_object_names, goal_region_name[0],
                                                parameters.planning_horizon)

    motion_planner = OperatorBaseMotionPlanner(problem_env, 'prm')

    problem_env.set_reward_function(reward_function)
    problem_env.set_motion_planner(motion_planner)

    learned_q = None
    prior_q = None
    if parameters.use_learned_q:
        learned_q = load_learned_q(parameters, problem_env)

    v_fcn = lambda state: -len(get_objects_to_move(state, problem_env))

    if parameters.planner == 'mcts':
        planner = MCTS(parameters, problem_env, goal_entities, prior_q, learned_q)
    elif parameters.planner == 'mcts_with_leaf_strategy':
        planner = MCTSWithLeafStrategy(parameters, problem_env, goal_entities, v_fcn, learned_q)
    else:
        raise NotImplementedError

    set_seed(parameters.planner_seed)
    search_time_to_reward, plan = planner.search(max_time=parameters.timelimit)
    pickle.dump({"search_time_to_reward": search_time_to_reward, 'plan': plan,
                 'n_nodes': len(planner.tree.get_discrete_nodes())}, open(save_dir+filename, 'wb'))
Ejemplo n.º 5
0
def main():
    commit_hash = get_commit_hash()
    parameters = parse_mover_problem_parameters()
    filename = 'pidx_%d_planner_seed_%d.pkl' % (parameters.pidx,
                                                parameters.planner_seed)
    save_dir = make_and_get_save_dir(parameters, filename, commit_hash)
    solution_file_name = save_dir + filename
    is_problem_solved_before = os.path.isfile(solution_file_name)
    print solution_file_name
    if is_problem_solved_before and not parameters.f:
        print "***************Already solved********************"
        with open(solution_file_name, 'rb') as f:
            trajectory = pickle.load(f)
            tottime = trajectory['search_time_to_reward'][-1][2]
            print 'Time: %.2f ' % tottime
        sys.exit(-1)

    set_seed(parameters.pidx)
    problem_env = PaPMoverEnv(parameters.pidx)

    goal_objs = [
        'square_packing_box1', 'square_packing_box2',
        'rectangular_packing_box3', 'rectangular_packing_box4'
    ]
    goal_region = 'home_region'
    problem_env.set_goal(goal_objs, goal_region)
    goal_entities = goal_objs + [goal_region]
    if parameters.use_shaped_reward:
        # uses the reward shaping per Ng et al.
        reward_function = ShapedRewardFunction(problem_env, goal_objs,
                                               goal_region,
                                               parameters.planning_horizon)
    else:
        reward_function = GenericRewardFunction(problem_env, goal_objs,
                                                goal_region,
                                                parameters.planning_horizon)

    motion_planner = OperatorBaseMotionPlanner(problem_env, 'prm')

    problem_env.set_reward_function(reward_function)
    problem_env.set_motion_planner(motion_planner)

    learned_q = None
    prior_q = None
    if parameters.use_learned_q:
        learned_q = load_learned_q(parameters, problem_env)

    v_fcn = lambda state: -len(get_objects_to_move(state, problem_env))

    if parameters.planner == 'mcts':
        planner = MCTS(parameters, problem_env, goal_entities, prior_q,
                       learned_q)
    elif parameters.planner == 'mcts_with_leaf_strategy':
        planner = MCTSWithLeafStrategy(parameters, problem_env, goal_entities,
                                       v_fcn, learned_q)
    else:
        raise NotImplementedError

    set_seed(parameters.planner_seed)
    stime = time.time()
    search_time_to_reward, n_feasibility_checks, plan = planner.search(
        max_time=parameters.timelimit)
    tottime = time.time() - stime
    print 'Time: %.2f ' % (tottime)

    # todo
    #   save the entire tree

    pickle.dump(
        {
            "search_time_to_reward": search_time_to_reward,
            'plan': plan,
            'commit_hash': commit_hash,
            'n_feasibility_checks': n_feasibility_checks,
            'n_nodes': len(planner.tree.get_discrete_nodes())
        }, open(save_dir + filename, 'wb'))