예제 #1
0
    def __init__(self):
        self.allow_interventions = rospy.get_param('~allow_interventions', False)
        self.trials = rospy.get_param('~trials', 1)
        self.trial = 0
        self.successes = 0
        self.failures = 0
        self.timeouts = 0

        self.last_action = Action()
        self.last_action.action_type = Action.NOOP

        self.terminal_action_counts = {}

        if self.trials > 1:
            self.interventions = []
            self.action_counts = []
            self.temp_action_counts = [0, 0, 0, 0, 0, 0, 0, 0]
            self.successful_action_counts = []
            self.failed_action_counts = []
            for i in range(self.trials):
                self.interventions.append(0)
                self.action_counts.append([0, 0, 0, 0, 0, 0, 0, 0])
        else:
            self.interventions = 0
            self.action_counts = [0, 0, 0, 0, 0, 0, 0, 0]
        self.step_count = 0

        self.query_state = rospy.ServiceProxy('table_sim/query_state', QueryState)
        self.query_status = rospy.ServiceProxy('table_sim/query_status', QueryStatus)
        self.select_action = rospy.ServiceProxy('table_sim/select_action', SelectAction)
        self.execute = rospy.ServiceProxy('table_sim/execute_action', Execute)
        self.request_intervention = rospy.ServiceProxy('table_sim/request_intervention', RequestIntervention)
        self.reset = rospy.ServiceProxy('table_sim/reset_simulation', Empty)

        print 'Starting trial 1...'
예제 #2
0
    def evaluate(self, eval_seed):
        simulator_api = self.simulator_api[self.simulators[None]]
        rospy.set_param(simulator_api['seed_param_name'], eval_seed)
        simulator_api['reset_sim']()
        num_steps = 0

        status = Status.IN_PROGRESS
        while status == Status.IN_PROGRESS:
            if num_steps > self.max_episode_length:
                status = Status.TIMEOUT
                break

            state = simulator_api['query_state']().state
            selected_action = simulator_api['select_action'](state, Action())
            action = selected_action.action
            next_state = simulator_api['execute'](action)
            status = simulator_api['query_status'](next_state.state).status.status_code

            self.total_actions += 1
            if selected_action.action_source == 1:
                self.actions_from_learned_policy += 1

            num_steps += 1
            # rospy.sleep(0.5)

        return status == Status.COMPLETED
예제 #3
0
    def select_action(self, req):
        """Return an action generated from the plan network."""

        action = Action()

        print 'Planning...\n'
        plan = self.forward_planner.plan(req.state)
        print '\nPlan: '
        print str(plan)

        while True:
            rospy.sleep(1.0)

        return action
예제 #4
0
    def run(self):

        for amdp_id, value_iterator in self.Us.iteritems():
            # Don't run value iteration for the top-level AMDPs
            if amdp_id in [4, 11, 12]:
                continue

            # print("Solving:", amdp_id)
            # value_iterator.init_utilities()
            # value_iterator.solve()

            # print("Saving:", amdp_id)
            # value_iterator.save()

        self.amdp_node.reinit_U()

        restart = 'y'
        while restart == 'y':
            state = self.query_state().state
            for i in range(100):
                selected_action = self.select_action(state, Action())

                action = selected_action.action

                print('Selected action: ' + str(action.action_type) + ', ' +
                      str(action.object))
                print('(press e to execute, q to quit)')
                s = ''
                while (s != 'e' and s != 'q'):
                    s = raw_input('(e/q) >> ')
                if s == 'q':
                    break

                state = self.execute(action).state

            print('AMDP testing finished.  Restart?')
            restart = raw_input('(y/n) >> ')

        print('Exiting.')
예제 #5
0
    def select_action(self, req):
        """Return an action generated from the plan network."""

        action = Action()

        action_list = []

        # check if we are correctly at the current node based on action effects
        if self.current_node != 'start':
            if self.handle_intervention_action:
                self.prev_action = req.prev_action
                self.handle_intervention_action = False
            actual_node = self.network.generalize_action(
                PlanAction(self.prev_state, self.prev_action, req.state))
            if actual_node != self.current_node:
                # print 'Unexpected effects!  Updating current node... (Note: this node may not be in the graph!)'
                # print '\n\n---------------------------------------'
                # print 'Current node: '
                # print str(self.current_node)
                # print '\n----------------------------------------'
                # print 'Actual node: '
                # print str(actual_node)
                # print '-----------------------------------------\n\n'
                if self.network.has_node(actual_node):
                    self.current_node = actual_node
                else:
                    # see if there are remaining actions available
                    if len(self.remaining_actions) > 0:
                        # see if remaining actions have valid preconditions
                        valid_remaining_actions = []
                        norm = 0
                        for act in self.remaining_actions:
                            if act[0].check_preconditions(
                                    req.state, act[1], act[2],
                                    self.network.object_to_cluster):
                                valid_remaining_actions.append(act)
                                norm += act[3]
                        self.remaining_actions = []
                        if len(valid_remaining_actions) > 0:
                            for act in valid_remaining_actions:
                                act[3] /= float(norm)
                            action_list = valid_remaining_actions
                        else:
                            self.current_node = self.network.find_suitable_node(
                                req.state)
                    self.current_node = self.network.find_suitable_node(
                        req.state)
            # else:
            #     print 'Expected effects match.'

        if self.current_node is None:
            action.action_type = Action.NOOP
            self.noop_count += 1
            self.prev_state = copy.deepcopy(req.state)
            self.intervention_requested = True
            return action
        elif len(action_list) == 0:
            action_list = self.network.get_successor_actions(
                self.current_node, req.state)

        # check if there were no successors
        if len(action_list) == 0:
            self.current_node = self.network.find_suitable_node(req.state)
            if self.current_node is None:
                action.action_type = Action.NOOP
                self.noop_count += 1
                self.prev_state = copy.deepcopy(req.state)
                self.intervention_requested = True
                return action
            action_list = self.network.get_successor_actions(
                self.current_node, req.state)

        #print '\n\nAction list: '
        #print str(action_list)

        if len(action_list) > 0:

            # print '\nAction list: '
            # for act in action_list:
            #     print str(act[0].action) + ', ' + str(act[1]) + ', ' + str(act[2]) + ', ' + str(act[3])
            # print '\n'

            selection = random()
            count = 0
            selected_action = action_list[0]
            for i in range(len(action_list)):
                count += action_list[i][3]
                if count >= selection:
                    selected_action = action_list[i]
                    break
            action.action_type = selected_action[0].action
            if action.action_type == Action.GRASP:
                action.object = selected_action[1]
                if len(action.object) > 0:
                    action.object = action.object[0].upper(
                    ) + action.object[1:]
            elif action.action_type == Action.PLACE or action.action_type == Action.MOVE_ARM:
                action.position = DataUtils.semantic_action_to_position(
                    req.state, selected_action[2])
            self.prev_node = copy.deepcopy(self.current_node)
            self.current_node = copy.deepcopy(selected_action[0])
            self.prev_state = copy.deepcopy(req.state)
            self.prev_action = copy.deepcopy(action)
            self.remaining_actions = action_list
            norm = selected_action[3]
            self.remaining_actions.remove(selected_action)
        else:
            # print 'Still no actions!'
            action.action_type = Action.NOOP
            self.prev_state = copy.deepcopy(req.state)
            self.intervention_requested = True

        if action.action_type != Action.NOOP:
            # print 'Action:\n' + str(action.action_type) + ', ' + selected_action[1] + ', ' + selected_action[2]
            self.noop_count = 0
        else:
            self.noop_count += 1

        return action
예제 #6
0
    def __init__(
        self,
        simulator_name='table_sim',
        transition_functions=None,
        value_tables=None,
        demo_mode=None,  # DemonstrationMode object. If None, RANDOM+CLASSIFIER
        baseline_mode=False,
        q_learning_mode=False,
        q_tables=None,
        complexity=1,  # complexity 0 represents 1I-1C environments, used for exploitation during training
        env_type=0,  # Used to specify box or drawer environments when complexity=0, ignored otherwise
        continuous=False  # flag to specify continuous mode (i.e. running on a physical platform such as Nimbus)
    ):
        self.continuous = continuous

        self.baseline_mode = baseline_mode  # flag for running without utilities at leaf action selection
        self.q_learning_mode = q_learning_mode  # use Q tables for leaf amdp action selection

        self.complexity = complexity
        self.env_type = env_type

        a_file_drawer = rospy.get_param(
            '~actions_drawer',
            rospkg.RosPack().get_path('task_sim') +
            '/src/task_sim/str/A_drawer.pkl')
        a_file_box = rospy.get_param(
            '~actions_box',
            rospkg.RosPack().get_path('task_sim') +
            '/src/task_sim/str/A_box.pkl')

        self.A = {}
        self.U = {}
        self.U_t = value_tables
        self.T = transition_functions or {}
        if self.q_learning_mode:
            self.Q = q_tables or {}

        self.A[0] = pickle.load(file(a_file_drawer))
        self.A[1] = self.A[0]
        self.A[2] = self.A[0]

        self.A[6] = pickle.load(file(a_file_box))
        self.A[7] = self.A[6]
        self.A[8] = self.A[6]
        self.A[4] = []

        a = Action()
        a.action_type = 0
        self.A[4].append(deepcopy(a))
        a.action_type = 1
        self.A[4].append(deepcopy(a))
        a.action_type = 2
        a.object = 'apple'
        self.A[4].append(deepcopy(a))
        a.object = 'banana'
        self.A[4].append(deepcopy(a))
        self.A[11] = []
        a = Action()
        a.action_type = 6
        self.A[11].append(deepcopy(a))
        a.action_type = 7
        self.A[11].append(deepcopy(a))
        a.action_type = 8
        a.object = 'carrot'
        self.A[11].append(deepcopy(a))
        a.object = 'daikon'
        self.A[11].append(deepcopy(a))
        a = Action()
        self.A[12] = []
        a.action_type = 4
        self.A[12].append(deepcopy(a))
        a.action_type = 11
        self.A[12].append(deepcopy(a))

        if value_tables is None:
            self.U[0] = pickle.load(file('U0.pkl'))
            self.U[1] = pickle.load(file('U1.pkl'))
            self.U[2] = pickle.load(file('U2.pkl'))
            self.U[4] = pickle.load(file('U4.pkl'))
            self.U[6] = pickle.load(file('U6.pkl'))
            self.U[7] = pickle.load(file('U7.pkl'))
            self.U[8] = pickle.load(file('U8.pkl'))
            self.U[11] = pickle.load(file('U11.pkl'))
            self.U[12] = pickle.load(file('U12.pkl'))

        if transition_functions is None:
            self.T[0] = AMDPTransitionsLearned(amdp_id=0)
            self.T[2] = AMDPTransitionsLearned(amdp_id=2)
            self.T[6] = AMDPTransitionsLearned(amdp_id=6)
            self.T[8] = AMDPTransitionsLearned(amdp_id=8)
            self.T[4] = AMDPTransitionsLearned(amdp_id=4)
            self.T[11] = AMDPTransitionsLearned(amdp_id=11)
            self.T[12] = AMDPTransitionsLearned(amdp_id=12)

        # demo config, loads modes, policies, and classifiers
        self.demo_mode = demo_mode or DemonstrationMode(
            DemonstrationMode.RANDOM | DemonstrationMode.CLASSIFIER)
        self.demo_configs = {}
        self.demo_configs[0] = self.demo_mode.configuration(
            amdp_id=0, container_env='task4')
        self.demo_configs[2] = self.demo_mode.configuration(
            amdp_id=2, container_env='task4')
        self.demo_configs[6] = self.demo_mode.configuration(
            amdp_id=6, container_env='task7')
        self.demo_configs[8] = self.demo_mode.configuration(
            amdp_id=8, container_env='task7')

        # load decision trees, shadow policies
        self.classifiers = {}
        self.classifiers_alternate = {}
        self.pis = {}
        self.action_sequences = {}

        for i in [0, 2, 6, 8]:
            if self.demo_mode.shadow:
                self.pis[i] = self.demo_configs[i].get('demo_policy')
            if self.demo_mode.classifier:
                self.classifiers[i] = self.demo_configs[i].get('action_bias')
                self.classifiers_alternate[i] = self.demo_configs[i].get(
                    'action_bias_alternate')
            if self.demo_mode.plan_network:
                self.action_sequences[i] = self.demo_configs[i].get(
                    'action_sequences')

        self.service = rospy.Service(simulator_name + '/select_action',
                                     SelectAction, self.select_action)
        self.status_service = rospy.Service(simulator_name + '/query_status',
                                            QueryStatus, self.query_status)
예제 #7
0
    def select_action(self, req, debug=1):
        action = Action()

        action_list = []

        oo_state = OOState(state=req.state, continuous=self.continuous)

        if self.complexity > 0:
            # TODO: this is commented out for drawer-only testing!
            # start at the top level
            s = AMDPState(amdp_id=12, state=oo_state)
            utilities = {}
            for a in self.A[12]:
                successors = self.T[t_id_map[12]].transition_function(s, a)
                u = 0
                for i in range(len(successors)):
                    p = successors[i][0]
                    s_prime = successors[i][1]
                    if s_prime in self.U[12]:
                        u += p * self.U[12][s_prime]
                    elif is_terminal(s_prime, amdp_id=12):
                        u += p * reward(s_prime, amdp_id=12)
                utilities[a] = u

            # print '\n---'
            # for key in utilities:
            #     print str(key)
            #     print 'utility: ' + str(utilities[key])

            # pick top action deterministically
            max_utility = -999999
            for a in utilities.keys():
                if utilities[a] > max_utility:
                    max_utility = utilities[a]
                    action_list = []
                    action_list.append(deepcopy(a))
                elif utilities[a] == max_utility:
                    action_list.append(deepcopy(a))

            # select action
            # i = randint(0, len(action_list) - 1)
            i = 0
            id = action_list[i].action_type
            #obj = action_list[i].object

            if debug > 0:
                print 'Top level action selection: ' + str(id)

            s = AMDPState(amdp_id=id, state=oo_state)
            # s = AMDPState(amdp_id=4, state=oo_state)  # TODO: temporary, for drawer-only testing

        else:
            if self.env_type % 2 == 0:
                id = 4
            else:
                id = 11

            s = AMDPState(amdp_id=id,
                          state=oo_state,
                          ground_items=['apple', 'apple', 'apple', 'apple'])

        # TODO: debugging state
        print '\n\n-------------------------------------------------------------'
        print 'Mid-level AMDP state:'
        print str(s)
        print '-------------------------------------------------------------\n\n'

        utilities = {}
        for a in self.A[id]:
            successors = self.T[t_id_map[id]].transition_function(s, a)
            u = 0
            for i in range(len(successors)):
                p = successors[i][0]
                s_prime = successors[i][1]
                if s_prime in self.U[id]:
                    u += p * self.U[id][s_prime]
                elif is_terminal(s_prime, amdp_id=id):
                    u += p * reward(s_prime, amdp_id=id)
            utilities[a] = u

        # print '\n---'
        # for key in utilities:
        #     print str(key)
        #     print 'utility: ' + str(utilities[key])

        # pick top action deterministically
        max_utility = -999999
        for a in utilities.keys():
            if utilities[a] > max_utility:
                max_utility = utilities[a]
                action_list = []
                action_list.append(deepcopy(a))
            elif utilities[a] == max_utility:
                action_list.append(deepcopy(a))

        # select action
        # i = randint(0, len(action_list) - 1)
        i = 0
        id = action_list[i].action_type
        if self.complexity > 0:
            obj = action_list[i].object
        else:
            if action_list[i].object in [
                    'apple', 'banana', 'carrot', 'daikon'
            ]:
                obj = 'apple'
            else:
                obj = action_list[i].object

        if debug > 0:
            print '\tMid level action selection: ' + str(id) + ', ' + str(obj)

        # solve lower level mdp for executable action
        action_list = []
        s = AMDPState(amdp_id=id, state=oo_state, ground_items=[obj])

        # TODO: debugging state
        print '\n\n-------------------------------------------------------------'
        print 'Low-level AMDP state:'
        print str(s)
        print '-------------------------------------------------------------\n\n'

        selected_from_utility = 1

        if self.q_learning_mode:
            action = self.Q[id].select_action(s, action_list=self.A[id])
            if action is None:
                selected_from_utility = 0
                if self.demo_mode.classifier:
                    action = Action()
                    features = s.to_vector()
                    probs = self.classifiers[t_id_map[id]].predict_proba(
                        np.asarray(features).reshape(1,
                                                     -1)).flatten().tolist()
                    selection = random()
                    cprob = 0
                    action_label = '0:apple'
                    for i in range(0, len(probs)):
                        cprob += probs[i]
                        if cprob >= selection:
                            action_label = self.classifiers[
                                t_id_map[id]].classes_[i]
                            break
                    # Convert back to action
                    result = action_label.split(':')
                    action.action_type = int(result[0])
                    if len(result) > 1:
                        action.object = result[1]
                else:
                    action = self.A[id][randint(0, len(self.A[id]) - 1)]
            if action.object == 'apple':
                if obj not in items:
                    action.object = items[randint(0, len(items) - 1)]
                else:
                    action.object = obj
        elif self.baseline_mode:
            selected_from_utility = 0
            if self.demo_mode.classifier:
                features = s.to_vector()
                probs = self.classifiers[t_id_map[id]].predict_proba(
                    np.asarray(features).reshape(1, -1)).flatten().tolist()
                selection = random()
                cprob = 0
                action_label = '0:apple'
                for i in range(0, len(probs)):
                    cprob += probs[i]
                    if cprob >= selection:
                        action_label = self.classifiers[
                            t_id_map[id]].classes_[i]
                        break
                # Convert back to action
                result = action_label.split(':')
                action.action_type = int(result[0])
                if len(result) > 1:
                    action.object = result[1]
                    if action.object == 'apple':
                        if obj not in items:
                            action.object = items[randint(0, len(items) - 1)]
                        else:
                            action.object = obj
            elif self.demo_mode.plan_network:
                current_node = self.action_sequences[
                    t_id_map[id]].find_suitable_node(req.state,
                                                     ground_items=[obj])
                if current_node is None:
                    current_node = 'start'
                action_list = self.action_sequences[
                    t_id_map[id]].get_successor_actions(current_node,
                                                        req.state,
                                                        ground_items=[obj])
                # select action stochastically if we're in the network, select randomly otherwise
                if len(action_list) == 0:
                    # random
                    action = self.A[id][randint(0, len(self.A[id]) - 1)]
                    if action.object == 'apple':
                        if obj not in items:
                            action.object = items[randint(0, len(items) - 1)]
                        else:
                            action.object = obj
                else:
                    selection = random()
                    count = 0
                    selected_action = action_list[0]
                    for i in range(len(action_list)):
                        count += action_list[i][1]
                        if count >= selection:
                            selected_action = action_list[i]
                            break
                    action.action_type = selected_action[0].action_type
                    action.object = selected_action[0].action_object
                    if action.object == 'apple':
                        if obj not in items:
                            action.object = items[randint(0, len(items) - 1)]
                        else:
                            action.object = obj
            else:
                action = self.A[id][randint(0, len(self.A[id]) - 1)]
                if action.object == 'apple':
                    if obj not in items:
                        action.object = items[randint(0, len(items) - 1)]
                    else:
                        action.object = obj

        else:
            utilities = {}
            for a in self.A[id]:
                successors = self.T[t_id_map[id]].transition_function(s, a)
                u = 0
                for i in range(len(successors)):
                    p = successors[i][0]
                    s_prime = successors[i][1]
                    if s_prime in self.U[id]:
                        u += p * self.U[id][s_prime]
                    elif is_terminal(s_prime, amdp_id=id):
                        u += p * reward(s_prime, amdp_id=id)
                utilities[a] = u

            # print '\n---'
            # for key in utilities:
            #     print str(key)
            #     print 'utility: ' + str(utilities[key])

            # pick top action deterministically
            max_utility = -999999
            for a in utilities.keys():
                if utilities[a] > max_utility:
                    max_utility = utilities[a]
                    action_list = []
                    action = deepcopy(a)
                    if action.object == 'apple':
                        if obj not in items:
                            action.object = items[randint(0, len(items) - 1)]
                        else:
                            action.object = obj
                    action_list.append(deepcopy(action))
                elif utilities[a] == max_utility:
                    action = deepcopy(a)
                    if action.object == 'apple':
                        if obj not in items:
                            action.object = items[randint(0, len(items) - 1)]
                        else:
                            action.object = obj
                    action_list.append(deepcopy(action))
                if debug > 1:
                    print 'Action: ', a.action_type, ':', a.object, ', Utility: ', utilities[
                        a]

            if max_utility != 0 and max_utility > 0:  # there is a successor state is in the utility table
                i = randint(0, len(action_list) - 1)
                # i = 0
                action = action_list[i]
                if debug > 0:
                    print('Action selected from utilities')
            else:  # we need to select an action a different way
                selected_from_utility = 0
                if self.demo_mode.plan_network and not self.demo_mode.classifier:
                    current_node = self.action_sequences[
                        t_id_map[id]].find_suitable_node(req.state,
                                                         ground_items=[obj])
                    if current_node is None:
                        current_node = 'start'
                    action_list = self.action_sequences[
                        t_id_map[id]].get_successor_actions(current_node,
                                                            req.state,
                                                            ground_items=[obj])

                    # select action stochastically if we're in the network, select randomly otherwise
                    if len(action_list) == 0:
                        # random
                        action = self.A[id][randint(0, len(self.A[id]) - 1)]
                        if action.object == 'apple':
                            if obj not in items:
                                action.object = items[randint(
                                    0,
                                    len(items) - 1)]
                            else:
                                action.object = obj
                    else:
                        selection = random()
                        count = 0
                        selected_action = action_list[0]
                        for i in range(len(action_list)):
                            count += action_list[i][1]
                            if count >= selection:
                                selected_action = action_list[i]
                                break
                        action.action_type = selected_action[0].action_type
                        action.object = selected_action[0].action_object
                        if action.object == 'apple':
                            if obj not in items:
                                action.object = items[randint(
                                    0,
                                    len(items) - 1)]
                            else:
                                action.object = obj
                elif self.demo_mode.plan_network and self.demo_mode.classifier:
                    # 50/50 tradeoff between plan network and classifier
                    use_plan_network = random() < 0.5
                    use_classifier = not use_plan_network

                    if use_plan_network:
                        current_node = self.action_sequences[
                            t_id_map[id]].find_suitable_node(
                                req.state, ground_items=[obj])
                        if current_node is None:
                            current_node = 'start'
                        action_list = self.action_sequences[
                            t_id_map[id]].get_successor_actions(
                                current_node, req.state, ground_items=[obj])

                        # select action stochastically if we're in the network, select with classifier otherwise
                        if len(action_list) == 0:
                            use_classifier = True
                        else:
                            selection = random()
                            count = 0
                            selected_action = action_list[0]
                            for i in range(len(action_list)):
                                count += action_list[i][1]
                                if count >= selection:
                                    selected_action = action_list[i]
                                    break
                            action.action_type = selected_action[0].action_type
                            action.object = selected_action[0].action_object
                            if action.object == 'apple':
                                if obj not in items:
                                    action.object = items[randint(
                                        0,
                                        len(items) - 1)]
                                else:
                                    action.object = obj

                            if debug > 0:
                                print('Action selected from plan network')

                    if use_classifier:
                        features = s.to_vector()
                        probs = self.classifiers[t_id_map[id]].predict_proba(
                            np.asarray(features).reshape(
                                1, -1)).flatten().tolist()
                        selection = random()
                        cprob = 0
                        action_label = '0:apple'
                        for i in range(0, len(probs)):
                            cprob += probs[i]
                            if cprob >= selection:
                                action_label = self.classifiers[
                                    t_id_map[id]].classes_[i]
                                break
                        # Convert back to action
                        result = action_label.split(':')
                        action.action_type = int(result[0])
                        if len(result) > 1:
                            action.object = result[1]
                            if action.object == 'apple':
                                if obj not in items:
                                    action.object = items[randint(
                                        0,
                                        len(items) - 1)]
                                else:
                                    action.object = obj
                        if debug > 0:
                            print('Action selected from classifier')

                elif self.demo_mode.classifier:
                    features = s.to_vector()

                    # if random() < 0.5:
                    probs = self.classifiers[t_id_map[id]].predict_proba(
                        np.asarray(features).reshape(1,
                                                     -1)).flatten().tolist()
                    selection = random()
                    cprob = 0
                    action_label = '0:apple'
                    for i in range(0, len(probs)):
                        cprob += probs[i]
                        if cprob >= selection:
                            action_label = self.classifiers[
                                t_id_map[id]].classes_[i]
                            break
                    # else:
                    #     probs = self.classifiers[t_id_map[id]].predict_proba(np.asarray(features).reshape(1, -1)).flatten().tolist()
                    #     selection = random()
                    #     cprob = 0
                    #     action_label = '0:apple'
                    #     for i in range(0, len(probs)):
                    #         cprob += probs[i]
                    #         if cprob >= selection:
                    #             action_label = self.classifiers[t_id_map[id]].classes_[i]
                    #             break

                    # Convert back to action
                    result = action_label.split(':')
                    action.action_type = int(result[0])
                    if len(result) > 1:
                        action.object = result[1]
                        if action.object == 'apple':
                            if obj not in items:
                                action.object = items[randint(
                                    0,
                                    len(items) - 1)]
                            else:
                                action.object = obj
                    if debug > 0:
                        print '***** Action selected from decision tree. *****'

                # random action
                # if self.demo_mode.random:
                else:
                    action = self.A[id][randint(0, len(self.A[id]) - 1)]
                    if action.object == 'apple':
                        if obj not in items:
                            action.object = items[randint(0, len(items) - 1)]
                        else:
                            action.object = obj

        if debug > 0:
            print '\t\tLow level action selection: ' + str(
                action.action_type) + ', ' + str(action.object)
        if action.action_type == Action.PLACE:
            if not self.continuous:
                action.position = DataUtils.semantic_action_to_position(
                    req.state, action.object)
                action.object = ''
        elif action.action_type == Action.MOVE_ARM:
            if not self.continuous:
                if action.object == 'l':
                    action.position.x = req.state.gripper_position.x - 10
                    action.position.y = req.state.gripper_position.y
                elif action.object == 'fl':
                    action.position.x = req.state.gripper_position.x - 10
                    action.position.y = req.state.gripper_position.y - 5
                elif action.object == 'f':
                    action.position.x = req.state.gripper_position.x
                    action.position.y = req.state.gripper_position.y - 5
                elif action.object == 'fr':
                    action.position.x = req.state.gripper_position.x + 10
                    action.position.y = req.state.gripper_position.y - 5
                elif action.object == 'r':
                    action.position.x = req.state.gripper_position.x + 10
                    action.position.y = req.state.gripper_position.y
                elif action.object == 'br':
                    action.position.x = req.state.gripper_position.x + 10
                    action.position.y = req.state.gripper_position.y + 5
                elif action.object == 'b':
                    action.position.x = req.state.gripper_position.x
                    action.position.y = req.state.gripper_position.y + 5
                elif action.object == 'bl':
                    action.position.x = req.state.gripper_position.x - 10
                    action.position.y = req.state.gripper_position.y + 5
                else:
                    action.position = DataUtils.semantic_action_to_position(
                        req.state, action.object)
                action.object = ''
        elif action.action_type != Action.GRASP:
            action.object = ''

        # print '\n\n-------------------'
        # print 'Selected action: '
        # print str(action)

        return action, selected_from_utility
예제 #8
0
    def initialize(self):
        # initialize state list
        # print 'Enumerating states (started at: ' + str(datetime.datetime.now())  + ")"
        s = RelationState()
        s.relations['apple_right_of_drawer'] = True
        s.relations['apple_in_front_of_drawer'] = True
        s.relations['apple_below_drawer'] = True
        s.relations['apple_right_of_gripper'] = True
        s.relations['apple_behind_gripper'] = True
        s.relations['apple_below_gripper'] = True
        s.relations['gripper_in_front_of_drawer'] = True
        s.relations['gripper_open'] = True
        s.relations['drawer_closing_stack'] = True
        s.gripper_holding = ''
        self.U[deepcopy(s)] = 0.0

        # for i in range(3):
        #     s.relations['apple_left_of_drawer'] = False
        #     s.relations['apple_right_of_drawer'] = False
        #     if i == 1:
        #         s.relations['apple_left_of_drawer'] = True
        #     elif i == 2:
        #         s.relations['apple_right_of_drawer'] = True
        #
        #     for i2 in range(3):
        #         s.relations['apple_in_front_of_drawer'] = False
        #         s.relations['apple_behind_drawer'] = False
        #         if i2 == 1:
        #             s.relations['apple_in_front_of_drawer'] = True
        #         elif i2 == 2:
        #             s.relations['apple_behind_drawer'] = True
        #
        #         for i3 in range(3):
        #             s.relations['apple_above_drawer'] = False
        #             s.relations['apple_below_drawer'] = False
        #             if i3 == 1:
        #                 s.relations['apple_above_drawer'] = True
        #             elif i3 == 2:
        #                 s.relations['apple_below_drawer'] = True
        #
        #             for i4 in range(3):
        #                 s.relations['apple_left_of_gripper'] = False
        #                 s.relations['apple_right_of_gripper'] = False
        #                 if i4 == 1:
        #                     s.relations['apple_left_of_gripper'] = True
        #                 elif i4 == 2:
        #                     s.relations['apple_right_of_gripper'] = True
        #
        #                 for i5 in range(3):
        #                     s.relations['apple_in_front_of_gripper'] = False
        #                     s.relations['apple_behind_gripper'] = False
        #                     if i5 == 1:
        #                         s.relations['apple_in_front_of_gripper'] = True
        #                     elif i5 == 2:
        #                         s.relations['apple_behind_gripper'] = True
        #
        #                     for i6 in range(3):
        #                         s.relations['apple_above_gripper'] = False
        #                         s.relations['apple_below_gripper'] = False
        #                         if i6 == 1:
        #                             s.relations['apple_above_gripper'] = True
        #                         elif i6 == 2:
        #                             s.relations['apple_below_gripper'] = True
        #
        #                         for i7 in range(3):
        #                             s.relations['gripper_left_of_drawer'] = False
        #                             s.relations['gripper_right_of_drawer'] = False
        #                             if i7 == 1:
        #                                 s.relations['gripper_left_of_drawer'] = True
        #                             elif i7 == 2:
        #                                 s.relations['gripper_right_of_drawer'] = True
        #
        #                             for i8 in range(3):
        #                                 s.relations['gripper_in_front_of_drawer'] = False
        #                                 s.relations['gripper_behind_drawer'] = False
        #                                 if i8 == 1:
        #                                     s.relations['gripper_in_front_of_drawer'] = True
        #                                 elif i8 == 2:
        #                                     s.relations['gripper_behind_drawer'] = True
        #
        #                                 for i9 in range(3):
        #                                     s.relations['gripper_above_drawer'] = False
        #                                     s.relations['gripper_below_drawer'] = False
        #                                     if i9 == 1:
        #                                         s.relations['gripper_above_drawer'] = True
        #                                     elif i9 == 2:
        #                                         s.relations['gripper_below_drawer'] = True
        #
        #                                     for i10 in range(2):
        #                                         s.relations['apple_touching_drawer'] = False
        #                                         if i10 == 1:
        #                                             s.relations['apple_touching_drawer'] = True
        #
        #                                         for i11 in range(2):
        #                                             s.relations['apple_touching_stack'] = False
        #                                             if i11 == 1:
        #                                                 s.relations['apple_touching_stack'] = True
        #
        #                                             for i12 in range(2):
        #                                                 s.relations['gripper_touching_drawer'] = False
        #                                                 if i12 == 1:
        #                                                     s.relations['gripper_touching_drawer'] = True
        #
        #                                                 for i13 in range(2):
        #                                                     s.relations['gripper_touching_stack'] = False
        #                                                     if i13 == 1:
        #                                                         s.relations['gripper_touching_stack'] = True
        #
        #                                                     for i14 in range(2):
        #                                                         s.relations['drawer_closing_stack'] = False
        #                                                         if i14 == 1:
        #                                                             s.relations['drawer_closing_stack'] = True
        #
        #                                                         for i15 in range(2):
        #                                                             s.relations['gripper_open'] = False
        #                                                             if i15 == 1:
        #                                                                 s.relations['gripper_open'] = True
        #
        #                                                             s.gripper_holding = ''
        #                                                             self.U[deepcopy(s)] = 0.0
        #
        #                                                             if not (s.relations['apple_left_of_gripper'] or
        #                                                                     s.relations['apple_right_of_gripper'] or
        #                                                                     s.relations['apple_in_front_of_gripper'] or
        #                                                                     s.relations['apple_behind_gripper'] or
        #                                                                     s.relations['apple_above_gripper'] or
        #                                                                     s.relations['apple_below_gripper']) \
        #                                                                     and not s.relations['gripper_open']:
        #                                                                 s.gripper_holding = 'apple'
        #                                                                 self.U[deepcopy(s)] = 0.0
        #                                                             elif not (s.relations['gripper_above_drawer'] or
        #                                                                       s.relations['gripper_below_drawer'] or
        #                                                                       s.relations['gripper_in_front_of_drawer']
        #                                                                       or s.relations['gripper_behind_drawer'] or
        #                                                                       s.relations['gripper_left_of_drawer']) \
        #                                                                     and s.relations['gripper_right_of_drawer']:
        #                                                                 s.gripper_holding = 'drawer'
        #                                                                 self.U[deepcopy(s)] = 0.0
        #
        # print 'Finished enumerating states (finished at: ' + str(datetime.datetime.now()) + ")"

        # initialize action list
        a = Action()

        a.action_type = Action.GRASP
        for o in self.grasp_objects:
            a.object = o
            self.actions.append(deepcopy(a))

        a.action_type = Action.PLACE
        for o in self.place_objects:
            a.object = o
            self.actions.append(deepcopy(a))

        a.action_type = Action.MOVE_ARM
        for o in self.move_objects:
            a.object = o
            self.actions.append(deepcopy(a))

        a.action_type = Action.OPEN_GRIPPER
        self.actions.append(deepcopy(a))

        a.action_type = Action.CLOSE_GRIPPER
        self.actions.append(deepcopy(a))

        a.action_type = Action.RAISE_ARM
        self.actions.append(deepcopy(a))

        a.action_type = Action.LOWER_ARM
        self.actions.append(deepcopy(a))

        a.action_type = Action.RESET_ARM
        self.actions.append(deepcopy(a))
예제 #9
0
    def generate_action(self, req):
        """Return binary classification of an ordered grasp pair feature vector."""

        action = Action()

        action.action_type = randint(0, 7)

        if action.action_type == Action.GRASP:
            object = randint(5, 11)
            if object == 5:
                action.object = 'Drawer'
            else:
                action.object = DataUtils.int_to_name(object)

        if action.action_type == Action.PLACE:
            if self.semantic_place:
                action_modifier = randint(1, 5)
                if action_modifier == 1:
                    action_modifier = 0
                elif action_modifier == 4:
                    action_modifier = 6
                if DataUtils.int_to_name(action_modifier) == 'Stack':
                    # Pick a random free point on top of the stack of drawers
                    points = []
                    if req.state.drawer_position.theta == 0 or req.state.drawer_position.theta == 180:
                        for x in range(int(req.state.drawer_position.x - 3),
                                       int(req.state.drawer_position.x + 4)):
                            for y in range(
                                    int(req.state.drawer_position.y - 2),
                                    int(req.state.drawer_position.y + 3)):
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z == 3:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 3))
                    else:
                        for x in range(int(req.state.drawer_position.x - 2),
                                       int(req.state.drawer_position.x + 3)):
                            for y in range(
                                    int(req.state.drawer_position.y - 3),
                                    int(req.state.drawer_position.y + 4)):
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z == 3:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 3))
                    if len(points) > 0:
                        action.position = points[randint(0, len(points) - 1)]
                    else:  # Pick a random point on the table
                        action.position.x = randint(0, 40)
                        action.position.y = randint(0, 15)
                        action.position.z = 0
                elif DataUtils.int_to_name(action_modifier) == 'Drawer':
                    # Pick a random free point in the drawer that's also not in the drawer stack footprint
                    points = []
                    if req.state.drawer_position.theta == 0:
                        for x in range(
                                int(req.state.drawer_position.x + 4),
                                int(req.state.drawer_position.x +
                                    req.state.drawer_opening + 3)):
                            for y in range(
                                    int(req.state.drawer_position.y - 1),
                                    int(req.state.drawer_position.y + 2)):
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z > 0:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 2))
                    elif req.state.drawer_position.theta == 180:
                        for x in range(
                                int(req.state.drawer_position.x -
                                    req.state.drawer_opening - 2),
                                int(req.state.drawer_position.x - 3)):
                            for y in range(
                                    int(req.state.drawer_position.y - 1),
                                    int(req.state.drawer_position.y + 2)):
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z > 0:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 2))
                    elif req.state.drawer_position.theta == 90:
                        for x in range(int(req.state.drawer_position.x - 1),
                                       int(req.state.drawer_position.x + 2)):
                            for y in range(
                                    int(req.state.drawer_position.y + 4),
                                    int(req.state.drawer_position.y +
                                        req.state.drawer_opening + 3)):
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z > 0:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 2))
                    else:
                        for x in range(int(req.state.drawer_position.x - 1),
                                       int(req.state.drawer_position.x + 2)):
                            for y in range(
                                    int(req.state.drawer_position.y -
                                        req.state.drawer_opening - 2),
                                    int(req.state.drawer_position.y - 3)):
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z > 0:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 2))
                    if len(points) > 0:
                        action.position = points[randint(0, len(points) - 1)]
                    else:  # Pick a random point on the table
                        action.position.x = randint(0, 40)
                        action.position.y = randint(0, 15)
                        action.position.z = 0
                elif DataUtils.int_to_name(action_modifier) == 'Box':
                    # Special case: holding lid
                    if req.state.object_in_gripper.lower() == 'lid':
                        action.position = req.state.box_position
                    else:
                        # Pick a random free point in the box that's also not in the lid footprint
                        points = []
                        for x in range(int(req.state.box_position.x - 1),
                                       int(req.state.box_position.x + 2)):
                            for y in range(int(req.state.box_position.y - 1),
                                           int(req.state.box_position.y + 2)):
                                if (x >= req.state.lid_position.x - 2
                                        and x <= req.state.lid_position.x + 2
                                        and y >= req.state.lid_position.y - 2
                                        and y <= req.state.lid_position.y + 2):
                                    continue
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z <= 1:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 2))
                        if len(points) > 0:
                            action.position = points[randint(
                                0,
                                len(points) - 1)]
                        else:  # Pick a random point on the table
                            action.position.x = randint(0, 40)
                            action.position.y = randint(0, 15)
                            action.position.z = 0
                elif DataUtils.int_to_name(action_modifier) == 'Lid':
                    # Pick a random free point on the lid
                    points = []
                    for x in range(int(req.state.lid_position.x - 2),
                                   int(req.state.lid_position.x + 3)):
                        for y in range(int(req.state.lid_position.y - 2),
                                       int(req.state.lid_position.y + 3)):
                            clear = True
                            for obj in req.state.objects:
                                if obj.position.x == x and obj.position.y == y and obj.position.z == req.state.lid_position.z:
                                    clear = False
                                    break
                            if clear:
                                points.append(Point(x, y, 2))
                    if len(points) > 0:
                        action.position = points[randint(0, len(points) - 1)]
                    else:  # Pick a random point on the table
                        action.position.x = randint(0, 40)
                        action.position.y = randint(0, 15)
                        action.position.z = 0
                else:  # Pick a random point on the table
                    action.position.x = randint(0, 40)
                    action.position.y = randint(0, 15)
                    action.position.z = 0
            else:  # Pick a random point on the table
                action.position.x = randint(0, 40)
                action.position.y = randint(0, 15)
                action.position.z = 0

        if action.action_type == Action.MOVE_ARM:
            action.position.x = randint(0, 40)
            action.position.y = randint(0, 15)
            action.position.z = 0

        return action
예제 #10
0
    def __init__(
        self,
        amdp_id=2,  # Correlates to the id's in amdp_state
        container_env='task4',  # Container in the environment
        simulator_node='table_sim',  # The name of the table_sim environment
        transition_function=None,  # If the transitions are init elsewhere
        demo_mode=None,  # DemonstrationMode object. If None, RANDOM+CLASSIFIER+SHADOW
        demo_config=None,  # Config from demonstrations. If None, use the default mode
        max_episode_length=100,  # Max. length of an episode
        exploit_policy=False  # Exploit learned policy using AMDP node to tradeoff with exploration
    ):
        self.demo_mode = demo_mode or DemonstrationMode(
            DemonstrationMode.RANDOM | DemonstrationMode.CLASSIFIER
            | DemonstrationMode.SHADOW)

        # data_file = rospy.get_param('~data', 'state-action_2018-04-20.pkl')
        # sa_pairs = pickle.load(file(data_file))

        # parameters for controlling exploration. # TODO: fetch through the demo mode
        self.alpha = 0.5  # directly following demonstrations vs. random exploration
        self.epsilon = 0.5  # random exploration vs. general policy guided exploration
        self.exploit_epsilon = 1.0

        self.epoch = 0
        self.successes = 0
        self.action_executions = 0

        # Read demo data and config
        self.container_env = rospy.get_param('~container_env', container_env)
        self.amdp_id = rospy.get_param('~amdp_id', amdp_id)
        self.demo_config = demo_config or self.demo_mode.configuration(
            amdp_id=self.amdp_id, container_env=self.container_env)

        # Set the transition function
        self.transition_function = transition_function or AMDPTransitionsLearned(
            amdp_id=self.amdp_id)

        # read action list
        if self.amdp_id >= 0 and self.amdp_id <= 2:
            a_file = rospy.get_param(
                '~actions',
                rospkg.RosPack().get_path('task_sim') +
                '/src/task_sim/str/A_drawer.pkl')
        else:
            a_file = rospy.get_param(
                '~actions',
                rospkg.RosPack().get_path('task_sim') +
                '/src/task_sim/str/A_box.pkl')
        self.A = pickle.load(file(a_file))

        if self.amdp_id == -2:
            a = Action()
            a.action_type = 0
            a.object = 'banana'
            self.A.append(deepcopy(a))
            a.action_type = 4
            self.A.append(deepcopy(a))
        elif self.amdp_id == -3:
            a = Action()
            a.action_type = 0
            a.object = 'banana'
            self.A.append(deepcopy(a))
            a.action_type = 4
            self.A.append(deepcopy(a))
            a.object = 'carrot'
            self.A.append(deepcopy(a))
            a.action_type = 0
            self.A.append(deepcopy(a))

        if self.amdp_id >= 6 and self.amdp_id <= 8:
            a = Action()
            a.action_type = 0
            a.object = 'lid'
            self.A.append(deepcopy(a))
            a.action_type = 4
            self.A.append(deepcopy(a))
            a.object = 'box'
            self.A.append(deepcopy(a))
            a.action_type = 1
            self.A.append(deepcopy(a))
            a.object = 'lid'
            self.A.append(deepcopy(a))

        # fill in the policy directly from demonstrations (if demo_mode calls for it)
        if self.demo_mode.shadow:
            self.pi = self.demo_config.get('demo_policy')

        # load weak classifier to bias random exploration (if demo_mode calls for it)
        if self.demo_mode.classifier:
            self.action_bias = self.demo_config.get('action_bias')
            self.action_bias_alternate = self.demo_config.get(
                'action_bias_alternate')

        # load plan network to bias random exploration (if demo_mode calls for it)
        if self.demo_mode.plan_network:
            self.action_sequences = self.demo_config.get('action_sequences')

        # Setup the services
        self.query_state = rospy.ServiceProxy(simulator_node + '/query_state',
                                              QueryState)
        self.execute_action = rospy.ServiceProxy(
            simulator_node + '/execute_action', Execute)
        self.reset_sim = rospy.ServiceProxy(
            simulator_node + '/reset_simulation', Empty)

        self.n = 0  # number of executions
        self.prev_state = None
        self.timeout = 0
        self.max_episode_length = max_episode_length

        if self.demo_mode.plan_network:
            self.current_node = 'start'
            self.prev_state_msg = None
            self.prev_action = None

        self.exploit_policy = exploit_policy
        if self.exploit_policy:
            self.query_status = rospy.ServiceProxy(
                simulator_node + '/query_status', QueryStatus),
            self.select_action = rospy.ServiceProxy(
                simulator_node + '/select_action', SelectAction)
예제 #11
0
    def run(self):
        state_msg = self.query_state().state
        s = AMDPState(amdp_id=self.amdp_id, state=OOState(state=state_msg))

        self.timeout += 1

        goal_reached = goal_check(state_msg, self.amdp_id)
        if self.timeout > self.max_episode_length or goal_reached:
            self.timeout = 0
            # self.reset_sim()
            self.epoch += 1
            if goal_reached:
                self.successes += 1
            if self.demo_mode.plan_network:
                self.current_node = 'start'
                self.prev_state_msg = None
                self.prev_action = None
            return

        exploit_check = random()
        if self.exploit_policy and exploit_check > self.exploit_epsilon:
            a = self.select_action(state_msg, Action()).action
        else:
            # plan network exploration, behavior implemented individually to stop conditionals from getting crazy
            if self.demo_mode.plan_network:
                # determine the current node in the plan network
                if self.prev_state_msg is None or self.prev_action is None:
                    self.current_node = 'start'
                else:
                    self.current_node = AMDPPlanAction(self.prev_state_msg,
                                                       self.prev_action,
                                                       state_msg, self.amdp_id)

                # select action
                a = Action()
                if self.demo_mode.classifier:
                    if random() < self.alpha:
                        action_list = []
                        if self.action_sequences.has_node(self.current_node):
                            action_list = self.action_sequences.get_successor_actions(
                                self.current_node, state_msg)
                        else:
                            self.current_node = self.action_sequences.find_suitable_node(
                                state_msg)
                            if self.current_node is not None:
                                action_list = self.action_sequences.get_successor_actions(
                                    self.current_node, state_msg)

                        # select action stochastically if we're in the network, select randomly otherwise
                        if len(action_list) == 0:
                            a = self.A[randint(0, len(self.A) - 1)]
                        else:
                            selection = random()
                            count = 0
                            selected_action = action_list[0]
                            for i in range(len(action_list)):
                                count += action_list[i][1]
                                if count >= selection:
                                    selected_action = action_list[i]
                                    break
                            a.action_type = selected_action[0].action_type
                            a.object = selected_action[0].action_object
                    else:
                        if self.demo_mode.classifier:
                            if self.demo_mode.random and random(
                            ) <= self.epsilon:
                                a = self.A[randint(0, len(self.A) - 1)]
                            else:
                                features = s.to_vector()

                                # Classify action
                                probs = self.action_bias.predict_proba(
                                    np.asarray(features).reshape(
                                        1, -1)).flatten().tolist()
                                selection = random()
                                cprob = 0
                                action_label = '0:apple'
                                for i in range(0, len(probs)):
                                    cprob += probs[i]
                                    if cprob >= selection:
                                        action_label = self.action_bias.classes_[
                                            i]
                                        break
                                # Convert back to action
                                a = Action()
                                result = action_label.split(':')
                                a.action_type = int(result[0])
                                if len(result) > 1:
                                    a.object = result[1]
                        else:
                            a = self.A[randint(0, len(self.A) - 1)]
                else:
                    # select from the plan network, with a chance of random exploration, and use random exploration when
                    # off of the network
                    if random() < self.alpha:
                        action_list = []
                        if self.action_sequences.has_node(self.current_node):
                            action_list = self.action_sequences.get_successor_actions(
                                self.current_node, state_msg)
                        else:
                            self.current_node = self.action_sequences.find_suitable_node(
                                state_msg)
                            if self.current_node is not None:
                                action_list = self.action_sequences.get_successor_actions(
                                    self.current_node, state_msg)

                        # select action stochastically if we're in the network, select randomly otherwise
                        if len(action_list) == 0:
                            a = self.A[randint(0, len(self.A) - 1)]
                        else:
                            selection = random()
                            count = 0
                            selected_action = action_list[0]
                            for i in range(len(action_list)):
                                count += action_list[i][1]
                                if count >= selection:
                                    selected_action = action_list[i]
                                    break
                            a.action_type = selected_action[0].action_type
                            a.object = selected_action[0].action_object
                    else:
                        a = self.A[randint(0, len(self.A) - 1)]

                self.prev_state_msg = state_msg  # store state for the next iteration
                self.prev_action = action_to_sim(deepcopy(a), state_msg)

            else:
                if self.demo_mode.shadow and s in self.pi:
                    if random() < self.alpha:
                        a = self.pi[s].select_action()
                    else:
                        a = self.A[randint(0, len(self.A) - 1)]
                else:
                    if self.demo_mode.classifier:
                        # if random() < self.alpha:
                        if self.demo_mode.random and random() <= self.epsilon:
                            a = self.A[randint(0, len(self.A) - 1)]
                        else:
                            features = s.to_vector()

                            # Classify action
                            probs = self.action_bias.predict_proba(
                                np.asarray(features).reshape(
                                    1, -1)).flatten().tolist()
                            selection = random()
                            cprob = 0
                            action_label = '0:apple'
                            for i in range(0, len(probs)):
                                cprob += probs[i]
                                if cprob >= selection:
                                    action_label = self.action_bias.classes_[i]
                                    break
                            # Convert back to action
                            a = Action()
                            result = action_label.split(':')
                            a.action_type = int(result[0])
                            if len(result) > 1:
                                a.object = result[1]
                        # else:
                        #     if self.demo_mode.random and random() <= self.epsilon:
                        #         a = self.A[randint(0, len(self.A) - 1)]
                        #     else:
                        #         features = s.to_vector()
                        #
                        #         # Classify action
                        #         probs = self.action_bias.predict_proba(np.asarray(features).reshape(1, -1)).flatten().tolist()
                        #         selection = random()
                        #         cprob = 0
                        #         action_label = '0:apple'
                        #         for i in range(0, len(probs)):
                        #             cprob += probs[i]
                        #             if cprob >= selection:
                        #                 action_label = self.action_bias.classes_[i]
                        #                 break
                        #         # Convert back to action
                        #         a = Action()
                        #         result = action_label.split(':')
                        #         a.action_type = int(result[0])
                        #         if len(result) > 1:
                        #             a.object = result[1]
                    else:
                        a = self.A[randint(0, len(self.A) - 1)]

        self.execute_action(action_to_sim(deepcopy(a), state_msg))
        s_prime = AMDPState(amdp_id=self.amdp_id,
                            state=OOState(state=self.query_state().state))
        self.action_executions += 1

        self.transition_function.update_transition(s, a, s_prime)
        self.n += 1
        self.prev_state = deepcopy(s)
예제 #12
0
    def learn_q(self, s_prime, alpha=0.1, action_list=None):
        a_prime = None
        r_prime = reward(s_prime, amdp_id=self.amdp_id)

        noop = Action()
        noop.action_type = Action.NOOP

        if is_terminal(s_prime, amdp_id=self.amdp_id):
            sa_group = "{}/{}".format(self._state_idx(s_prime),
                                      self._action_idx(noop))
            if sa_group not in self.Q:
                self.Q.create_dataset(sa_group, data=[0.])
            self.Q[sa_group][0] = r_prime

        if self.s is not None:
            # Update the Q table
            sa_group = "{}/{}".format(self._state_idx(self.s),
                                      self._action_idx(self.a))
            s_prime_key = self._state_idx(s_prime)

            if sa_group in self.Q:
                Q_sa = self.Q[sa_group]
            else:
                Q_sa = self.Q.create_dataset(sa_group, data=[0.])

            # get best action and max Q value
            Q_sa_prime = -9999999
            actions = []
            action_list_extended = deepcopy(action_list)
            action_list_extended.append(noop)
            for act in action_list_extended:
                sa_prime_group = "{}/{}".format(self._state_idx(s_prime),
                                                self._action_idx(act))
                if sa_prime_group in self.Q:
                    q = self.Q[sa_prime_group][0]
                else:
                    q = 0.0
                if act.action_type == Action.NOOP and q == 0:
                    continue
                if q > Q_sa_prime:
                    Q_sa_prime = q
                    actions = [act]
                elif q == Q_sa_prime:
                    actions.append(act)

            if len(actions) > 1:
                a_prime = actions[randint(0, len(actions) - 1)]
            else:
                a_prime = actions[0]

            Q_sa[0] += alpha * (self.r + 0.8 * Q_sa_prime - Q_sa[0])

        self.s = deepcopy(s_prime)
        self.r = deepcopy(r_prime)

        if a_prime is None or random() < self.epsilon:
            if self.mode == 0:
                a_prime = action_list[randint(0, len(action_list) - 1)]
            else:
                return None

        self.a = deepcopy(a_prime)

        return a_prime
예제 #13
0
    def classify(self, req):
        """Return binary classification of an ordered grasp pair feature vector."""

        action = Action()

        # Convert state to feature vector
        features = DataUtils.naive_state_vector(
            req.state,
            self.state_positions,
            self.state_semantics,
            history_buffer=self.history_buffer)

        # Classify action
        if self.stochastic:
            probs = self.action_model.predict_proba(
                np.asarray(features).reshape(1, -1)).flatten().tolist()
            selection = random()
            cprob = 0
            action_label = 0
            for i in range(1, len(probs)):
                cprob += probs[i]
                if cprob >= selection:
                    action_label = self.action_model.classes_[i]
                    break
        else:
            action_label = self.action_model.predict(
                np.asarray(features).reshape(1, -1))
        action_type = DataUtils.get_action_from_label(action_label)
        action_modifier = DataUtils.get_action_modifier_from_label(
            action_label)
        action.action_type = action_type
        if action_type in [Action.GRASP]:
            action.object = DataUtils.int_to_name(action_modifier)

        # Augment state with action
        features.extend([action_type, action_modifier])

        # Regress parameters where necessary
        if action_type in [Action.PLACE]:
            if self.semantic_place:
                if DataUtils.int_to_name(action_modifier) == 'Stack':
                    # Pick a random free point on top of the stack of drawers
                    points = []
                    if req.state.drawer_position.theta == 0 or req.state.drawer_position.theta == 180:
                        for x in range(int(req.state.drawer_position.x - 3),
                                       int(req.state.drawer_position.x + 4)):
                            for y in range(
                                    int(req.state.drawer_position.y - 2),
                                    int(req.state.drawer_position.y + 3)):
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z == 3:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 3))
                    else:
                        for x in range(int(req.state.drawer_position.x - 2),
                                       int(req.state.drawer_position.x + 3)):
                            for y in range(
                                    int(req.state.drawer_position.y - 3),
                                    int(req.state.drawer_position.y + 4)):
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z == 3:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 3))
                    if len(points) > 0:
                        action.position = points[randint(0, len(points) - 1)]
                    else:  # Regress parameters for table or unexpected place surfaces
                        target = self.place_model.predict(
                            np.asarray(features).reshape(1, -1))
                        # Convert coordinates to global frame
                        action.position = DataUtils.get_point_in_global_frame(
                            req.state,
                            Point(int(floor(target[0][0] + .5)),
                                  int(floor(target[0][1] + .5)), 0),
                            DataUtils.int_to_name(action_modifier))
                elif DataUtils.int_to_name(action_modifier) == 'Drawer':
                    # Pick a random free point in the drawer that's also not in the drawer stack footprint
                    points = []
                    if req.state.drawer_position.theta == 0:
                        for x in range(
                                int(req.state.drawer_position.x + 4),
                                int(req.state.drawer_position.x +
                                    req.state.drawer_opening + 3)):
                            for y in range(
                                    int(req.state.drawer_position.y - 1),
                                    int(req.state.drawer_position.y + 2)):
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z > 0:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 2))
                    elif req.state.drawer_position.theta == 180:
                        for x in range(
                                int(req.state.drawer_position.x -
                                    req.state.drawer_opening - 2),
                                int(req.state.drawer_position.x - 3)):
                            for y in range(
                                    int(req.state.drawer_position.y - 1),
                                    int(req.state.drawer_position.y + 2)):
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z > 0:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 2))
                    elif req.state.drawer_position.theta == 90:
                        for x in range(int(req.state.drawer_position.x - 1),
                                       int(req.state.drawer_position.x + 2)):
                            for y in range(
                                    int(req.state.drawer_position.y + 4),
                                    int(req.state.drawer_position.y +
                                        req.state.drawer_opening + 3)):
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z > 0:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 2))
                    else:
                        for x in range(int(req.state.drawer_position.x - 1),
                                       int(req.state.drawer_position.x + 2)):
                            for y in range(
                                    int(req.state.drawer_position.y -
                                        req.state.drawer_opening - 2),
                                    int(req.state.drawer_position.y - 3)):
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z > 0:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 2))
                    if len(points) > 0:
                        action.position = points[randint(0, len(points) - 1)]
                    else:  # Regress parameters for table or unexpected place surfaces
                        target = self.place_model.predict(
                            np.asarray(features).reshape(1, -1))
                        # Convert coordinates to global frame
                        action.position = DataUtils.get_point_in_global_frame(
                            req.state,
                            Point(int(floor(target[0][0] + .5)),
                                  int(floor(target[0][1] + .5)), 0),
                            DataUtils.int_to_name(action_modifier))
                elif DataUtils.int_to_name(action_modifier) == 'Box':
                    # Special case: holding lid
                    if req.state.object_in_gripper.lower() == 'lid':
                        action.position = req.state.box_position
                    else:
                        # Pick a random free point in the box that's also not in the lid footprint
                        points = []
                        for x in range(int(req.state.box_position.x - 1),
                                       int(req.state.box_position.x + 2)):
                            for y in range(int(req.state.box_position.y - 1),
                                           int(req.state.box_position.y + 2)):
                                if (x >= req.state.lid_position.x - 2
                                        and x <= req.state.lid_position.x + 2
                                        and y >= req.state.lid_position.y - 2
                                        and y <= req.state.lid_position.y + 2):
                                    continue
                                clear = True
                                for obj in req.state.objects:
                                    if obj.position.x == x and obj.position.y == y and obj.position.z <= 1:
                                        clear = False
                                        break
                                if clear:
                                    points.append(Point(x, y, 2))
                        if len(points) > 0:
                            action.position = points[randint(
                                0,
                                len(points) - 1)]
                        else:  # Regress parameters for table or unexpected place surfaces
                            target = self.place_model.predict(
                                np.asarray(features).reshape(1, -1))
                            # Convert coordinates to global frame
                            action.position = DataUtils.get_point_in_global_frame(
                                req.state,
                                Point(int(floor(target[0][0] + .5)),
                                      int(floor(target[0][1] + .5)), 0),
                                DataUtils.int_to_name(action_modifier))
                elif DataUtils.int_to_name(action_modifier) == 'Lid':
                    # Pick a random free point on the lid
                    points = []
                    for x in range(int(req.state.lid_position.x - 2),
                                   int(req.state.lid_position.x + 3)):
                        for y in range(int(req.state.lid_position.y - 2),
                                       int(req.state.lid_position.y + 3)):
                            clear = True
                            for obj in req.state.objects:
                                if obj.position.x == x and obj.position.y == y and obj.position.z == req.state.lid_position.z:
                                    clear = False
                                    break
                            if clear:
                                points.append(Point(x, y, 2))
                    if len(points) > 0:
                        action.position = points[randint(0, len(points) - 1)]
                    else:  # Regress parameters for table or unexpected place surfaces
                        target = self.place_model.predict(
                            np.asarray(features).reshape(1, -1))
                        # Convert coordinates to global frame
                        action.position = DataUtils.get_point_in_global_frame(
                            req.state,
                            Point(int(floor(target[0][0] + .5)),
                                  int(floor(target[0][1] + .5)), 0),
                            DataUtils.int_to_name(action_modifier))
                else:  # Regress parameters for table or unexpected place surfaces
                    target = self.place_model.predict(
                        np.asarray(features).reshape(1, -1))
                    # Convert coordinates to global frame
                    action.position = DataUtils.get_point_in_global_frame(
                        req.state,
                        Point(int(floor(target[0][0] + .5)),
                              int(floor(target[0][1] + .5)), 0),
                        DataUtils.int_to_name(action_modifier))
            else:
                target = self.place_model.predict(
                    np.asarray(features).reshape(1, -1))
                # Convert coordinates to global frame
                action.position = DataUtils.get_point_in_global_frame(
                    req.state,
                    Point(int(floor(target[0][0] + .5)),
                          int(floor(target[0][1] + .5)), 0),
                    DataUtils.int_to_name(action_modifier))

        if action_type in [Action.MOVE_ARM]:
            target = self.move_model.predict(
                np.asarray(features).reshape(1, -1))
            # Convert coordinates to global frame
            action.position = DataUtils.get_point_in_global_frame(
                req.state,
                Point(int(floor(target[0][0] + .5)),
                      int(floor(target[0][1] + .5)), 0), 'Gripper')

        return action
예제 #14
0
    def initialize(self, transition_function):
        # initialize action list
        a = Action()

        if self.amdp_id == 3:
            # actions are overloaded here to use the same message type
            # if amdp_id is 3 (the highest-level abstract mdp), then the actions correspond to the amdp_id, i.e.:
            #   0 - open drawer
            #   1 - close drawer
            #   2 - put apple in drawer
            a.action_type = 0
            self.actions.append(deepcopy(a))
            a.action_type = 1
            self.actions.append(deepcopy(a))
            a.action_type = 2
            self.actions.append(deepcopy(a))
        elif self.amdp_id == 4:
            # actions are overloaded here to use the same message type
            # if amdp_id is 3 (the highest-level abstract mdp), then the actions correspond to the amdp_id, i.e.:
            #   0 - open drawer
            #   1 - close drawer
            #   2 - put apple in drawer
            # ground items are stored in the Action.object member
            a.action_type = 0
            self.actions.append(deepcopy(a))
            a.action_type = 1
            self.actions.append(deepcopy(a))
            a.action_type = 2
            a.object = 'apple'
            self.actions.append(deepcopy(a))
            a.object = 'banana'
            self.actions.append(deepcopy(a))
        elif self.amdp_id == 5:
            # actions are overloaded here to use the same message type
            # if amdp_id is 3 (the highest-level abstract mdp), then the actions correspond to the amdp_id, i.e.:
            #   0 - open drawer
            #   1 - close drawer
            #   2 - put apple in drawer
            # ground items are stored in the Action.object member
            a.action_type = 0
            self.actions.append(deepcopy(a))
            a.action_type = 1
            self.actions.append(deepcopy(a))
            a.action_type = 2
            a.object = 'apple'
            self.actions.append(deepcopy(a))
            a.object = 'banana'
            self.actions.append(deepcopy(a))
            a.object = 'carrot'
            self.actions.append(deepcopy(a))
        elif self.amdp_id == 9:
            # actions are overloaded here to use the same message type
            # the actions correspond to the amdp_id, i.e.:
            #   6 - open box
            #   7 - close box
            #   8 - put carrot in box
            # ground items are stored in the Action.object member
            a.action_type = 6
            self.actions.append(deepcopy(a))
            a.action_type = 7
            self.actions.append(deepcopy(a))
            a.action_type = 8
            a.object = 'carrot'
            self.actions.append(deepcopy(a))
        elif self.amdp_id == 10:
            # actions are overloaded here to use the same message type
            # the actions correspond to the amdp_id, i.e.:
            #   4 - put fruits in drawer high-level amdp
            #   9 - put vegetable in box high-level amdp
            a.action_type = 4
            self.actions.append(deepcopy(a))
            a.action_type = 9
            self.actions.append(deepcopy(a))
        elif self.amdp_id == 11:
            # actions are overloaded here to use the same message type
            # the actions correspond to the amdp_id, i.e.:
            #   6 - open box
            #   7 - close box
            #   8 - put item in box
            # ground items are stored in the Action.object member
            a.action_type = 6
            self.actions.append(deepcopy(a))
            a.action_type = 7
            self.actions.append(deepcopy(a))
            a.action_type = 8
            a.object = 'carrot'
            self.actions.append(deepcopy(a))
            a.object = 'daikon'
            self.actions.append(deepcopy(a))
        elif self.amdp_id == 12:
            # actions are overloaded here to use the same message type
            # the actions correspond to the amdp_id, i.e.:
            #   4 - put fruits in drawer high-level amdp
            #   11 - put vegetables in box high-level amdp
            a.action_type = 4
            self.actions.append(deepcopy(a))
            a.action_type = 11
            self.actions.append(deepcopy(a))
        else:
            a.action_type = Action.GRASP
            if self.amdp_id >= 0 and self.amdp_id <= 2:
                for o in self.grasp_objects_drawer:
                    a.object = o
                    self.actions.append(deepcopy(a))
            elif self.amdp_id >= 6 and self.amdp_id <= 8:
                for o in self.grasp_objects_box:
                    a.object = o
                    self.actions.append(deepcopy(a))
            if self.amdp_id == -2:
                a.object = 'banana'
                self.actions.append(deepcopy(a))
            elif self.amdp_id == -3:
                a.object = 'banana'
                self.actions.append(deepcopy(a))
                a.object = 'carrot'
                self.actions.append(deepcopy(a))

            a.action_type = Action.PLACE
            if self.amdp_id >= 0 and self.amdp_id <= 2:
                for o in self.place_objects_drawer:
                    a.object = o
                    self.actions.append(deepcopy(a))
            elif self.amdp_id >= 6 and self.amdp_id <= 8:
                for o in self.place_objects_box:
                    a.object = o
                    self.actions.append(deepcopy(a))

            a.action_type = Action.MOVE_ARM
            if self.amdp_id >= 0 and self.amdp_id <= 2:
                for o in self.move_objects_drawer:
                    a.object = o
                    self.actions.append(deepcopy(a))
            elif self.amdp_id >= 6 and self.amdp_id <= 8:
                for o in self.move_objects_box:
                    a.object = o
                    self.actions.append(deepcopy(a))
            if self.amdp_id == -2:
                a.object = 'banana'
                self.actions.append(deepcopy(a))
            elif self.amdp_id == -3:
                a.object = 'banana'
                self.actions.append(deepcopy(a))
                a.object = 'carrot'
                self.actions.append(deepcopy(a))

            a.object = ''
            a.action_type = Action.OPEN_GRIPPER
            self.actions.append(deepcopy(a))

            a.action_type = Action.CLOSE_GRIPPER
            self.actions.append(deepcopy(a))

            a.action_type = Action.RAISE_ARM
            self.actions.append(deepcopy(a))

            a.action_type = Action.LOWER_ARM
            self.actions.append(deepcopy(a))

            a.action_type = Action.RESET_ARM
            self.actions.append(deepcopy(a))

        # initialize transition function
        assert transition_function is not None, "Unknown transition function!"
        self.T = transition_function

        # initialize utilities for states in transition function
        self.init_utilities()