def __init__(self): self.allow_interventions = rospy.get_param('~allow_interventions', False) self.trials = rospy.get_param('~trials', 1) self.trial = 0 self.successes = 0 self.failures = 0 self.timeouts = 0 self.last_action = Action() self.last_action.action_type = Action.NOOP self.terminal_action_counts = {} if self.trials > 1: self.interventions = [] self.action_counts = [] self.temp_action_counts = [0, 0, 0, 0, 0, 0, 0, 0] self.successful_action_counts = [] self.failed_action_counts = [] for i in range(self.trials): self.interventions.append(0) self.action_counts.append([0, 0, 0, 0, 0, 0, 0, 0]) else: self.interventions = 0 self.action_counts = [0, 0, 0, 0, 0, 0, 0, 0] self.step_count = 0 self.query_state = rospy.ServiceProxy('table_sim/query_state', QueryState) self.query_status = rospy.ServiceProxy('table_sim/query_status', QueryStatus) self.select_action = rospy.ServiceProxy('table_sim/select_action', SelectAction) self.execute = rospy.ServiceProxy('table_sim/execute_action', Execute) self.request_intervention = rospy.ServiceProxy('table_sim/request_intervention', RequestIntervention) self.reset = rospy.ServiceProxy('table_sim/reset_simulation', Empty) print 'Starting trial 1...'
def evaluate(self, eval_seed): simulator_api = self.simulator_api[self.simulators[None]] rospy.set_param(simulator_api['seed_param_name'], eval_seed) simulator_api['reset_sim']() num_steps = 0 status = Status.IN_PROGRESS while status == Status.IN_PROGRESS: if num_steps > self.max_episode_length: status = Status.TIMEOUT break state = simulator_api['query_state']().state selected_action = simulator_api['select_action'](state, Action()) action = selected_action.action next_state = simulator_api['execute'](action) status = simulator_api['query_status'](next_state.state).status.status_code self.total_actions += 1 if selected_action.action_source == 1: self.actions_from_learned_policy += 1 num_steps += 1 # rospy.sleep(0.5) return status == Status.COMPLETED
def select_action(self, req): """Return an action generated from the plan network.""" action = Action() print 'Planning...\n' plan = self.forward_planner.plan(req.state) print '\nPlan: ' print str(plan) while True: rospy.sleep(1.0) return action
def run(self): for amdp_id, value_iterator in self.Us.iteritems(): # Don't run value iteration for the top-level AMDPs if amdp_id in [4, 11, 12]: continue # print("Solving:", amdp_id) # value_iterator.init_utilities() # value_iterator.solve() # print("Saving:", amdp_id) # value_iterator.save() self.amdp_node.reinit_U() restart = 'y' while restart == 'y': state = self.query_state().state for i in range(100): selected_action = self.select_action(state, Action()) action = selected_action.action print('Selected action: ' + str(action.action_type) + ', ' + str(action.object)) print('(press e to execute, q to quit)') s = '' while (s != 'e' and s != 'q'): s = raw_input('(e/q) >> ') if s == 'q': break state = self.execute(action).state print('AMDP testing finished. Restart?') restart = raw_input('(y/n) >> ') print('Exiting.')
def select_action(self, req): """Return an action generated from the plan network.""" action = Action() action_list = [] # check if we are correctly at the current node based on action effects if self.current_node != 'start': if self.handle_intervention_action: self.prev_action = req.prev_action self.handle_intervention_action = False actual_node = self.network.generalize_action( PlanAction(self.prev_state, self.prev_action, req.state)) if actual_node != self.current_node: # print 'Unexpected effects! Updating current node... (Note: this node may not be in the graph!)' # print '\n\n---------------------------------------' # print 'Current node: ' # print str(self.current_node) # print '\n----------------------------------------' # print 'Actual node: ' # print str(actual_node) # print '-----------------------------------------\n\n' if self.network.has_node(actual_node): self.current_node = actual_node else: # see if there are remaining actions available if len(self.remaining_actions) > 0: # see if remaining actions have valid preconditions valid_remaining_actions = [] norm = 0 for act in self.remaining_actions: if act[0].check_preconditions( req.state, act[1], act[2], self.network.object_to_cluster): valid_remaining_actions.append(act) norm += act[3] self.remaining_actions = [] if len(valid_remaining_actions) > 0: for act in valid_remaining_actions: act[3] /= float(norm) action_list = valid_remaining_actions else: self.current_node = self.network.find_suitable_node( req.state) self.current_node = self.network.find_suitable_node( req.state) # else: # print 'Expected effects match.' if self.current_node is None: action.action_type = Action.NOOP self.noop_count += 1 self.prev_state = copy.deepcopy(req.state) self.intervention_requested = True return action elif len(action_list) == 0: action_list = self.network.get_successor_actions( self.current_node, req.state) # check if there were no successors if len(action_list) == 0: self.current_node = self.network.find_suitable_node(req.state) if self.current_node is None: action.action_type = Action.NOOP self.noop_count += 1 self.prev_state = copy.deepcopy(req.state) self.intervention_requested = True return action action_list = self.network.get_successor_actions( self.current_node, req.state) #print '\n\nAction list: ' #print str(action_list) if len(action_list) > 0: # print '\nAction list: ' # for act in action_list: # print str(act[0].action) + ', ' + str(act[1]) + ', ' + str(act[2]) + ', ' + str(act[3]) # print '\n' selection = random() count = 0 selected_action = action_list[0] for i in range(len(action_list)): count += action_list[i][3] if count >= selection: selected_action = action_list[i] break action.action_type = selected_action[0].action if action.action_type == Action.GRASP: action.object = selected_action[1] if len(action.object) > 0: action.object = action.object[0].upper( ) + action.object[1:] elif action.action_type == Action.PLACE or action.action_type == Action.MOVE_ARM: action.position = DataUtils.semantic_action_to_position( req.state, selected_action[2]) self.prev_node = copy.deepcopy(self.current_node) self.current_node = copy.deepcopy(selected_action[0]) self.prev_state = copy.deepcopy(req.state) self.prev_action = copy.deepcopy(action) self.remaining_actions = action_list norm = selected_action[3] self.remaining_actions.remove(selected_action) else: # print 'Still no actions!' action.action_type = Action.NOOP self.prev_state = copy.deepcopy(req.state) self.intervention_requested = True if action.action_type != Action.NOOP: # print 'Action:\n' + str(action.action_type) + ', ' + selected_action[1] + ', ' + selected_action[2] self.noop_count = 0 else: self.noop_count += 1 return action
def __init__( self, simulator_name='table_sim', transition_functions=None, value_tables=None, demo_mode=None, # DemonstrationMode object. If None, RANDOM+CLASSIFIER baseline_mode=False, q_learning_mode=False, q_tables=None, complexity=1, # complexity 0 represents 1I-1C environments, used for exploitation during training env_type=0, # Used to specify box or drawer environments when complexity=0, ignored otherwise continuous=False # flag to specify continuous mode (i.e. running on a physical platform such as Nimbus) ): self.continuous = continuous self.baseline_mode = baseline_mode # flag for running without utilities at leaf action selection self.q_learning_mode = q_learning_mode # use Q tables for leaf amdp action selection self.complexity = complexity self.env_type = env_type a_file_drawer = rospy.get_param( '~actions_drawer', rospkg.RosPack().get_path('task_sim') + '/src/task_sim/str/A_drawer.pkl') a_file_box = rospy.get_param( '~actions_box', rospkg.RosPack().get_path('task_sim') + '/src/task_sim/str/A_box.pkl') self.A = {} self.U = {} self.U_t = value_tables self.T = transition_functions or {} if self.q_learning_mode: self.Q = q_tables or {} self.A[0] = pickle.load(file(a_file_drawer)) self.A[1] = self.A[0] self.A[2] = self.A[0] self.A[6] = pickle.load(file(a_file_box)) self.A[7] = self.A[6] self.A[8] = self.A[6] self.A[4] = [] a = Action() a.action_type = 0 self.A[4].append(deepcopy(a)) a.action_type = 1 self.A[4].append(deepcopy(a)) a.action_type = 2 a.object = 'apple' self.A[4].append(deepcopy(a)) a.object = 'banana' self.A[4].append(deepcopy(a)) self.A[11] = [] a = Action() a.action_type = 6 self.A[11].append(deepcopy(a)) a.action_type = 7 self.A[11].append(deepcopy(a)) a.action_type = 8 a.object = 'carrot' self.A[11].append(deepcopy(a)) a.object = 'daikon' self.A[11].append(deepcopy(a)) a = Action() self.A[12] = [] a.action_type = 4 self.A[12].append(deepcopy(a)) a.action_type = 11 self.A[12].append(deepcopy(a)) if value_tables is None: self.U[0] = pickle.load(file('U0.pkl')) self.U[1] = pickle.load(file('U1.pkl')) self.U[2] = pickle.load(file('U2.pkl')) self.U[4] = pickle.load(file('U4.pkl')) self.U[6] = pickle.load(file('U6.pkl')) self.U[7] = pickle.load(file('U7.pkl')) self.U[8] = pickle.load(file('U8.pkl')) self.U[11] = pickle.load(file('U11.pkl')) self.U[12] = pickle.load(file('U12.pkl')) if transition_functions is None: self.T[0] = AMDPTransitionsLearned(amdp_id=0) self.T[2] = AMDPTransitionsLearned(amdp_id=2) self.T[6] = AMDPTransitionsLearned(amdp_id=6) self.T[8] = AMDPTransitionsLearned(amdp_id=8) self.T[4] = AMDPTransitionsLearned(amdp_id=4) self.T[11] = AMDPTransitionsLearned(amdp_id=11) self.T[12] = AMDPTransitionsLearned(amdp_id=12) # demo config, loads modes, policies, and classifiers self.demo_mode = demo_mode or DemonstrationMode( DemonstrationMode.RANDOM | DemonstrationMode.CLASSIFIER) self.demo_configs = {} self.demo_configs[0] = self.demo_mode.configuration( amdp_id=0, container_env='task4') self.demo_configs[2] = self.demo_mode.configuration( amdp_id=2, container_env='task4') self.demo_configs[6] = self.demo_mode.configuration( amdp_id=6, container_env='task7') self.demo_configs[8] = self.demo_mode.configuration( amdp_id=8, container_env='task7') # load decision trees, shadow policies self.classifiers = {} self.classifiers_alternate = {} self.pis = {} self.action_sequences = {} for i in [0, 2, 6, 8]: if self.demo_mode.shadow: self.pis[i] = self.demo_configs[i].get('demo_policy') if self.demo_mode.classifier: self.classifiers[i] = self.demo_configs[i].get('action_bias') self.classifiers_alternate[i] = self.demo_configs[i].get( 'action_bias_alternate') if self.demo_mode.plan_network: self.action_sequences[i] = self.demo_configs[i].get( 'action_sequences') self.service = rospy.Service(simulator_name + '/select_action', SelectAction, self.select_action) self.status_service = rospy.Service(simulator_name + '/query_status', QueryStatus, self.query_status)
def select_action(self, req, debug=1): action = Action() action_list = [] oo_state = OOState(state=req.state, continuous=self.continuous) if self.complexity > 0: # TODO: this is commented out for drawer-only testing! # start at the top level s = AMDPState(amdp_id=12, state=oo_state) utilities = {} for a in self.A[12]: successors = self.T[t_id_map[12]].transition_function(s, a) u = 0 for i in range(len(successors)): p = successors[i][0] s_prime = successors[i][1] if s_prime in self.U[12]: u += p * self.U[12][s_prime] elif is_terminal(s_prime, amdp_id=12): u += p * reward(s_prime, amdp_id=12) utilities[a] = u # print '\n---' # for key in utilities: # print str(key) # print 'utility: ' + str(utilities[key]) # pick top action deterministically max_utility = -999999 for a in utilities.keys(): if utilities[a] > max_utility: max_utility = utilities[a] action_list = [] action_list.append(deepcopy(a)) elif utilities[a] == max_utility: action_list.append(deepcopy(a)) # select action # i = randint(0, len(action_list) - 1) i = 0 id = action_list[i].action_type #obj = action_list[i].object if debug > 0: print 'Top level action selection: ' + str(id) s = AMDPState(amdp_id=id, state=oo_state) # s = AMDPState(amdp_id=4, state=oo_state) # TODO: temporary, for drawer-only testing else: if self.env_type % 2 == 0: id = 4 else: id = 11 s = AMDPState(amdp_id=id, state=oo_state, ground_items=['apple', 'apple', 'apple', 'apple']) # TODO: debugging state print '\n\n-------------------------------------------------------------' print 'Mid-level AMDP state:' print str(s) print '-------------------------------------------------------------\n\n' utilities = {} for a in self.A[id]: successors = self.T[t_id_map[id]].transition_function(s, a) u = 0 for i in range(len(successors)): p = successors[i][0] s_prime = successors[i][1] if s_prime in self.U[id]: u += p * self.U[id][s_prime] elif is_terminal(s_prime, amdp_id=id): u += p * reward(s_prime, amdp_id=id) utilities[a] = u # print '\n---' # for key in utilities: # print str(key) # print 'utility: ' + str(utilities[key]) # pick top action deterministically max_utility = -999999 for a in utilities.keys(): if utilities[a] > max_utility: max_utility = utilities[a] action_list = [] action_list.append(deepcopy(a)) elif utilities[a] == max_utility: action_list.append(deepcopy(a)) # select action # i = randint(0, len(action_list) - 1) i = 0 id = action_list[i].action_type if self.complexity > 0: obj = action_list[i].object else: if action_list[i].object in [ 'apple', 'banana', 'carrot', 'daikon' ]: obj = 'apple' else: obj = action_list[i].object if debug > 0: print '\tMid level action selection: ' + str(id) + ', ' + str(obj) # solve lower level mdp for executable action action_list = [] s = AMDPState(amdp_id=id, state=oo_state, ground_items=[obj]) # TODO: debugging state print '\n\n-------------------------------------------------------------' print 'Low-level AMDP state:' print str(s) print '-------------------------------------------------------------\n\n' selected_from_utility = 1 if self.q_learning_mode: action = self.Q[id].select_action(s, action_list=self.A[id]) if action is None: selected_from_utility = 0 if self.demo_mode.classifier: action = Action() features = s.to_vector() probs = self.classifiers[t_id_map[id]].predict_proba( np.asarray(features).reshape(1, -1)).flatten().tolist() selection = random() cprob = 0 action_label = '0:apple' for i in range(0, len(probs)): cprob += probs[i] if cprob >= selection: action_label = self.classifiers[ t_id_map[id]].classes_[i] break # Convert back to action result = action_label.split(':') action.action_type = int(result[0]) if len(result) > 1: action.object = result[1] else: action = self.A[id][randint(0, len(self.A[id]) - 1)] if action.object == 'apple': if obj not in items: action.object = items[randint(0, len(items) - 1)] else: action.object = obj elif self.baseline_mode: selected_from_utility = 0 if self.demo_mode.classifier: features = s.to_vector() probs = self.classifiers[t_id_map[id]].predict_proba( np.asarray(features).reshape(1, -1)).flatten().tolist() selection = random() cprob = 0 action_label = '0:apple' for i in range(0, len(probs)): cprob += probs[i] if cprob >= selection: action_label = self.classifiers[ t_id_map[id]].classes_[i] break # Convert back to action result = action_label.split(':') action.action_type = int(result[0]) if len(result) > 1: action.object = result[1] if action.object == 'apple': if obj not in items: action.object = items[randint(0, len(items) - 1)] else: action.object = obj elif self.demo_mode.plan_network: current_node = self.action_sequences[ t_id_map[id]].find_suitable_node(req.state, ground_items=[obj]) if current_node is None: current_node = 'start' action_list = self.action_sequences[ t_id_map[id]].get_successor_actions(current_node, req.state, ground_items=[obj]) # select action stochastically if we're in the network, select randomly otherwise if len(action_list) == 0: # random action = self.A[id][randint(0, len(self.A[id]) - 1)] if action.object == 'apple': if obj not in items: action.object = items[randint(0, len(items) - 1)] else: action.object = obj else: selection = random() count = 0 selected_action = action_list[0] for i in range(len(action_list)): count += action_list[i][1] if count >= selection: selected_action = action_list[i] break action.action_type = selected_action[0].action_type action.object = selected_action[0].action_object if action.object == 'apple': if obj not in items: action.object = items[randint(0, len(items) - 1)] else: action.object = obj else: action = self.A[id][randint(0, len(self.A[id]) - 1)] if action.object == 'apple': if obj not in items: action.object = items[randint(0, len(items) - 1)] else: action.object = obj else: utilities = {} for a in self.A[id]: successors = self.T[t_id_map[id]].transition_function(s, a) u = 0 for i in range(len(successors)): p = successors[i][0] s_prime = successors[i][1] if s_prime in self.U[id]: u += p * self.U[id][s_prime] elif is_terminal(s_prime, amdp_id=id): u += p * reward(s_prime, amdp_id=id) utilities[a] = u # print '\n---' # for key in utilities: # print str(key) # print 'utility: ' + str(utilities[key]) # pick top action deterministically max_utility = -999999 for a in utilities.keys(): if utilities[a] > max_utility: max_utility = utilities[a] action_list = [] action = deepcopy(a) if action.object == 'apple': if obj not in items: action.object = items[randint(0, len(items) - 1)] else: action.object = obj action_list.append(deepcopy(action)) elif utilities[a] == max_utility: action = deepcopy(a) if action.object == 'apple': if obj not in items: action.object = items[randint(0, len(items) - 1)] else: action.object = obj action_list.append(deepcopy(action)) if debug > 1: print 'Action: ', a.action_type, ':', a.object, ', Utility: ', utilities[ a] if max_utility != 0 and max_utility > 0: # there is a successor state is in the utility table i = randint(0, len(action_list) - 1) # i = 0 action = action_list[i] if debug > 0: print('Action selected from utilities') else: # we need to select an action a different way selected_from_utility = 0 if self.demo_mode.plan_network and not self.demo_mode.classifier: current_node = self.action_sequences[ t_id_map[id]].find_suitable_node(req.state, ground_items=[obj]) if current_node is None: current_node = 'start' action_list = self.action_sequences[ t_id_map[id]].get_successor_actions(current_node, req.state, ground_items=[obj]) # select action stochastically if we're in the network, select randomly otherwise if len(action_list) == 0: # random action = self.A[id][randint(0, len(self.A[id]) - 1)] if action.object == 'apple': if obj not in items: action.object = items[randint( 0, len(items) - 1)] else: action.object = obj else: selection = random() count = 0 selected_action = action_list[0] for i in range(len(action_list)): count += action_list[i][1] if count >= selection: selected_action = action_list[i] break action.action_type = selected_action[0].action_type action.object = selected_action[0].action_object if action.object == 'apple': if obj not in items: action.object = items[randint( 0, len(items) - 1)] else: action.object = obj elif self.demo_mode.plan_network and self.demo_mode.classifier: # 50/50 tradeoff between plan network and classifier use_plan_network = random() < 0.5 use_classifier = not use_plan_network if use_plan_network: current_node = self.action_sequences[ t_id_map[id]].find_suitable_node( req.state, ground_items=[obj]) if current_node is None: current_node = 'start' action_list = self.action_sequences[ t_id_map[id]].get_successor_actions( current_node, req.state, ground_items=[obj]) # select action stochastically if we're in the network, select with classifier otherwise if len(action_list) == 0: use_classifier = True else: selection = random() count = 0 selected_action = action_list[0] for i in range(len(action_list)): count += action_list[i][1] if count >= selection: selected_action = action_list[i] break action.action_type = selected_action[0].action_type action.object = selected_action[0].action_object if action.object == 'apple': if obj not in items: action.object = items[randint( 0, len(items) - 1)] else: action.object = obj if debug > 0: print('Action selected from plan network') if use_classifier: features = s.to_vector() probs = self.classifiers[t_id_map[id]].predict_proba( np.asarray(features).reshape( 1, -1)).flatten().tolist() selection = random() cprob = 0 action_label = '0:apple' for i in range(0, len(probs)): cprob += probs[i] if cprob >= selection: action_label = self.classifiers[ t_id_map[id]].classes_[i] break # Convert back to action result = action_label.split(':') action.action_type = int(result[0]) if len(result) > 1: action.object = result[1] if action.object == 'apple': if obj not in items: action.object = items[randint( 0, len(items) - 1)] else: action.object = obj if debug > 0: print('Action selected from classifier') elif self.demo_mode.classifier: features = s.to_vector() # if random() < 0.5: probs = self.classifiers[t_id_map[id]].predict_proba( np.asarray(features).reshape(1, -1)).flatten().tolist() selection = random() cprob = 0 action_label = '0:apple' for i in range(0, len(probs)): cprob += probs[i] if cprob >= selection: action_label = self.classifiers[ t_id_map[id]].classes_[i] break # else: # probs = self.classifiers[t_id_map[id]].predict_proba(np.asarray(features).reshape(1, -1)).flatten().tolist() # selection = random() # cprob = 0 # action_label = '0:apple' # for i in range(0, len(probs)): # cprob += probs[i] # if cprob >= selection: # action_label = self.classifiers[t_id_map[id]].classes_[i] # break # Convert back to action result = action_label.split(':') action.action_type = int(result[0]) if len(result) > 1: action.object = result[1] if action.object == 'apple': if obj not in items: action.object = items[randint( 0, len(items) - 1)] else: action.object = obj if debug > 0: print '***** Action selected from decision tree. *****' # random action # if self.demo_mode.random: else: action = self.A[id][randint(0, len(self.A[id]) - 1)] if action.object == 'apple': if obj not in items: action.object = items[randint(0, len(items) - 1)] else: action.object = obj if debug > 0: print '\t\tLow level action selection: ' + str( action.action_type) + ', ' + str(action.object) if action.action_type == Action.PLACE: if not self.continuous: action.position = DataUtils.semantic_action_to_position( req.state, action.object) action.object = '' elif action.action_type == Action.MOVE_ARM: if not self.continuous: if action.object == 'l': action.position.x = req.state.gripper_position.x - 10 action.position.y = req.state.gripper_position.y elif action.object == 'fl': action.position.x = req.state.gripper_position.x - 10 action.position.y = req.state.gripper_position.y - 5 elif action.object == 'f': action.position.x = req.state.gripper_position.x action.position.y = req.state.gripper_position.y - 5 elif action.object == 'fr': action.position.x = req.state.gripper_position.x + 10 action.position.y = req.state.gripper_position.y - 5 elif action.object == 'r': action.position.x = req.state.gripper_position.x + 10 action.position.y = req.state.gripper_position.y elif action.object == 'br': action.position.x = req.state.gripper_position.x + 10 action.position.y = req.state.gripper_position.y + 5 elif action.object == 'b': action.position.x = req.state.gripper_position.x action.position.y = req.state.gripper_position.y + 5 elif action.object == 'bl': action.position.x = req.state.gripper_position.x - 10 action.position.y = req.state.gripper_position.y + 5 else: action.position = DataUtils.semantic_action_to_position( req.state, action.object) action.object = '' elif action.action_type != Action.GRASP: action.object = '' # print '\n\n-------------------' # print 'Selected action: ' # print str(action) return action, selected_from_utility
def initialize(self): # initialize state list # print 'Enumerating states (started at: ' + str(datetime.datetime.now()) + ")" s = RelationState() s.relations['apple_right_of_drawer'] = True s.relations['apple_in_front_of_drawer'] = True s.relations['apple_below_drawer'] = True s.relations['apple_right_of_gripper'] = True s.relations['apple_behind_gripper'] = True s.relations['apple_below_gripper'] = True s.relations['gripper_in_front_of_drawer'] = True s.relations['gripper_open'] = True s.relations['drawer_closing_stack'] = True s.gripper_holding = '' self.U[deepcopy(s)] = 0.0 # for i in range(3): # s.relations['apple_left_of_drawer'] = False # s.relations['apple_right_of_drawer'] = False # if i == 1: # s.relations['apple_left_of_drawer'] = True # elif i == 2: # s.relations['apple_right_of_drawer'] = True # # for i2 in range(3): # s.relations['apple_in_front_of_drawer'] = False # s.relations['apple_behind_drawer'] = False # if i2 == 1: # s.relations['apple_in_front_of_drawer'] = True # elif i2 == 2: # s.relations['apple_behind_drawer'] = True # # for i3 in range(3): # s.relations['apple_above_drawer'] = False # s.relations['apple_below_drawer'] = False # if i3 == 1: # s.relations['apple_above_drawer'] = True # elif i3 == 2: # s.relations['apple_below_drawer'] = True # # for i4 in range(3): # s.relations['apple_left_of_gripper'] = False # s.relations['apple_right_of_gripper'] = False # if i4 == 1: # s.relations['apple_left_of_gripper'] = True # elif i4 == 2: # s.relations['apple_right_of_gripper'] = True # # for i5 in range(3): # s.relations['apple_in_front_of_gripper'] = False # s.relations['apple_behind_gripper'] = False # if i5 == 1: # s.relations['apple_in_front_of_gripper'] = True # elif i5 == 2: # s.relations['apple_behind_gripper'] = True # # for i6 in range(3): # s.relations['apple_above_gripper'] = False # s.relations['apple_below_gripper'] = False # if i6 == 1: # s.relations['apple_above_gripper'] = True # elif i6 == 2: # s.relations['apple_below_gripper'] = True # # for i7 in range(3): # s.relations['gripper_left_of_drawer'] = False # s.relations['gripper_right_of_drawer'] = False # if i7 == 1: # s.relations['gripper_left_of_drawer'] = True # elif i7 == 2: # s.relations['gripper_right_of_drawer'] = True # # for i8 in range(3): # s.relations['gripper_in_front_of_drawer'] = False # s.relations['gripper_behind_drawer'] = False # if i8 == 1: # s.relations['gripper_in_front_of_drawer'] = True # elif i8 == 2: # s.relations['gripper_behind_drawer'] = True # # for i9 in range(3): # s.relations['gripper_above_drawer'] = False # s.relations['gripper_below_drawer'] = False # if i9 == 1: # s.relations['gripper_above_drawer'] = True # elif i9 == 2: # s.relations['gripper_below_drawer'] = True # # for i10 in range(2): # s.relations['apple_touching_drawer'] = False # if i10 == 1: # s.relations['apple_touching_drawer'] = True # # for i11 in range(2): # s.relations['apple_touching_stack'] = False # if i11 == 1: # s.relations['apple_touching_stack'] = True # # for i12 in range(2): # s.relations['gripper_touching_drawer'] = False # if i12 == 1: # s.relations['gripper_touching_drawer'] = True # # for i13 in range(2): # s.relations['gripper_touching_stack'] = False # if i13 == 1: # s.relations['gripper_touching_stack'] = True # # for i14 in range(2): # s.relations['drawer_closing_stack'] = False # if i14 == 1: # s.relations['drawer_closing_stack'] = True # # for i15 in range(2): # s.relations['gripper_open'] = False # if i15 == 1: # s.relations['gripper_open'] = True # # s.gripper_holding = '' # self.U[deepcopy(s)] = 0.0 # # if not (s.relations['apple_left_of_gripper'] or # s.relations['apple_right_of_gripper'] or # s.relations['apple_in_front_of_gripper'] or # s.relations['apple_behind_gripper'] or # s.relations['apple_above_gripper'] or # s.relations['apple_below_gripper']) \ # and not s.relations['gripper_open']: # s.gripper_holding = 'apple' # self.U[deepcopy(s)] = 0.0 # elif not (s.relations['gripper_above_drawer'] or # s.relations['gripper_below_drawer'] or # s.relations['gripper_in_front_of_drawer'] # or s.relations['gripper_behind_drawer'] or # s.relations['gripper_left_of_drawer']) \ # and s.relations['gripper_right_of_drawer']: # s.gripper_holding = 'drawer' # self.U[deepcopy(s)] = 0.0 # # print 'Finished enumerating states (finished at: ' + str(datetime.datetime.now()) + ")" # initialize action list a = Action() a.action_type = Action.GRASP for o in self.grasp_objects: a.object = o self.actions.append(deepcopy(a)) a.action_type = Action.PLACE for o in self.place_objects: a.object = o self.actions.append(deepcopy(a)) a.action_type = Action.MOVE_ARM for o in self.move_objects: a.object = o self.actions.append(deepcopy(a)) a.action_type = Action.OPEN_GRIPPER self.actions.append(deepcopy(a)) a.action_type = Action.CLOSE_GRIPPER self.actions.append(deepcopy(a)) a.action_type = Action.RAISE_ARM self.actions.append(deepcopy(a)) a.action_type = Action.LOWER_ARM self.actions.append(deepcopy(a)) a.action_type = Action.RESET_ARM self.actions.append(deepcopy(a))
def generate_action(self, req): """Return binary classification of an ordered grasp pair feature vector.""" action = Action() action.action_type = randint(0, 7) if action.action_type == Action.GRASP: object = randint(5, 11) if object == 5: action.object = 'Drawer' else: action.object = DataUtils.int_to_name(object) if action.action_type == Action.PLACE: if self.semantic_place: action_modifier = randint(1, 5) if action_modifier == 1: action_modifier = 0 elif action_modifier == 4: action_modifier = 6 if DataUtils.int_to_name(action_modifier) == 'Stack': # Pick a random free point on top of the stack of drawers points = [] if req.state.drawer_position.theta == 0 or req.state.drawer_position.theta == 180: for x in range(int(req.state.drawer_position.x - 3), int(req.state.drawer_position.x + 4)): for y in range( int(req.state.drawer_position.y - 2), int(req.state.drawer_position.y + 3)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z == 3: clear = False break if clear: points.append(Point(x, y, 3)) else: for x in range(int(req.state.drawer_position.x - 2), int(req.state.drawer_position.x + 3)): for y in range( int(req.state.drawer_position.y - 3), int(req.state.drawer_position.y + 4)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z == 3: clear = False break if clear: points.append(Point(x, y, 3)) if len(points) > 0: action.position = points[randint(0, len(points) - 1)] else: # Pick a random point on the table action.position.x = randint(0, 40) action.position.y = randint(0, 15) action.position.z = 0 elif DataUtils.int_to_name(action_modifier) == 'Drawer': # Pick a random free point in the drawer that's also not in the drawer stack footprint points = [] if req.state.drawer_position.theta == 0: for x in range( int(req.state.drawer_position.x + 4), int(req.state.drawer_position.x + req.state.drawer_opening + 3)): for y in range( int(req.state.drawer_position.y - 1), int(req.state.drawer_position.y + 2)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z > 0: clear = False break if clear: points.append(Point(x, y, 2)) elif req.state.drawer_position.theta == 180: for x in range( int(req.state.drawer_position.x - req.state.drawer_opening - 2), int(req.state.drawer_position.x - 3)): for y in range( int(req.state.drawer_position.y - 1), int(req.state.drawer_position.y + 2)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z > 0: clear = False break if clear: points.append(Point(x, y, 2)) elif req.state.drawer_position.theta == 90: for x in range(int(req.state.drawer_position.x - 1), int(req.state.drawer_position.x + 2)): for y in range( int(req.state.drawer_position.y + 4), int(req.state.drawer_position.y + req.state.drawer_opening + 3)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z > 0: clear = False break if clear: points.append(Point(x, y, 2)) else: for x in range(int(req.state.drawer_position.x - 1), int(req.state.drawer_position.x + 2)): for y in range( int(req.state.drawer_position.y - req.state.drawer_opening - 2), int(req.state.drawer_position.y - 3)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z > 0: clear = False break if clear: points.append(Point(x, y, 2)) if len(points) > 0: action.position = points[randint(0, len(points) - 1)] else: # Pick a random point on the table action.position.x = randint(0, 40) action.position.y = randint(0, 15) action.position.z = 0 elif DataUtils.int_to_name(action_modifier) == 'Box': # Special case: holding lid if req.state.object_in_gripper.lower() == 'lid': action.position = req.state.box_position else: # Pick a random free point in the box that's also not in the lid footprint points = [] for x in range(int(req.state.box_position.x - 1), int(req.state.box_position.x + 2)): for y in range(int(req.state.box_position.y - 1), int(req.state.box_position.y + 2)): if (x >= req.state.lid_position.x - 2 and x <= req.state.lid_position.x + 2 and y >= req.state.lid_position.y - 2 and y <= req.state.lid_position.y + 2): continue clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z <= 1: clear = False break if clear: points.append(Point(x, y, 2)) if len(points) > 0: action.position = points[randint( 0, len(points) - 1)] else: # Pick a random point on the table action.position.x = randint(0, 40) action.position.y = randint(0, 15) action.position.z = 0 elif DataUtils.int_to_name(action_modifier) == 'Lid': # Pick a random free point on the lid points = [] for x in range(int(req.state.lid_position.x - 2), int(req.state.lid_position.x + 3)): for y in range(int(req.state.lid_position.y - 2), int(req.state.lid_position.y + 3)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z == req.state.lid_position.z: clear = False break if clear: points.append(Point(x, y, 2)) if len(points) > 0: action.position = points[randint(0, len(points) - 1)] else: # Pick a random point on the table action.position.x = randint(0, 40) action.position.y = randint(0, 15) action.position.z = 0 else: # Pick a random point on the table action.position.x = randint(0, 40) action.position.y = randint(0, 15) action.position.z = 0 else: # Pick a random point on the table action.position.x = randint(0, 40) action.position.y = randint(0, 15) action.position.z = 0 if action.action_type == Action.MOVE_ARM: action.position.x = randint(0, 40) action.position.y = randint(0, 15) action.position.z = 0 return action
def __init__( self, amdp_id=2, # Correlates to the id's in amdp_state container_env='task4', # Container in the environment simulator_node='table_sim', # The name of the table_sim environment transition_function=None, # If the transitions are init elsewhere demo_mode=None, # DemonstrationMode object. If None, RANDOM+CLASSIFIER+SHADOW demo_config=None, # Config from demonstrations. If None, use the default mode max_episode_length=100, # Max. length of an episode exploit_policy=False # Exploit learned policy using AMDP node to tradeoff with exploration ): self.demo_mode = demo_mode or DemonstrationMode( DemonstrationMode.RANDOM | DemonstrationMode.CLASSIFIER | DemonstrationMode.SHADOW) # data_file = rospy.get_param('~data', 'state-action_2018-04-20.pkl') # sa_pairs = pickle.load(file(data_file)) # parameters for controlling exploration. # TODO: fetch through the demo mode self.alpha = 0.5 # directly following demonstrations vs. random exploration self.epsilon = 0.5 # random exploration vs. general policy guided exploration self.exploit_epsilon = 1.0 self.epoch = 0 self.successes = 0 self.action_executions = 0 # Read demo data and config self.container_env = rospy.get_param('~container_env', container_env) self.amdp_id = rospy.get_param('~amdp_id', amdp_id) self.demo_config = demo_config or self.demo_mode.configuration( amdp_id=self.amdp_id, container_env=self.container_env) # Set the transition function self.transition_function = transition_function or AMDPTransitionsLearned( amdp_id=self.amdp_id) # read action list if self.amdp_id >= 0 and self.amdp_id <= 2: a_file = rospy.get_param( '~actions', rospkg.RosPack().get_path('task_sim') + '/src/task_sim/str/A_drawer.pkl') else: a_file = rospy.get_param( '~actions', rospkg.RosPack().get_path('task_sim') + '/src/task_sim/str/A_box.pkl') self.A = pickle.load(file(a_file)) if self.amdp_id == -2: a = Action() a.action_type = 0 a.object = 'banana' self.A.append(deepcopy(a)) a.action_type = 4 self.A.append(deepcopy(a)) elif self.amdp_id == -3: a = Action() a.action_type = 0 a.object = 'banana' self.A.append(deepcopy(a)) a.action_type = 4 self.A.append(deepcopy(a)) a.object = 'carrot' self.A.append(deepcopy(a)) a.action_type = 0 self.A.append(deepcopy(a)) if self.amdp_id >= 6 and self.amdp_id <= 8: a = Action() a.action_type = 0 a.object = 'lid' self.A.append(deepcopy(a)) a.action_type = 4 self.A.append(deepcopy(a)) a.object = 'box' self.A.append(deepcopy(a)) a.action_type = 1 self.A.append(deepcopy(a)) a.object = 'lid' self.A.append(deepcopy(a)) # fill in the policy directly from demonstrations (if demo_mode calls for it) if self.demo_mode.shadow: self.pi = self.demo_config.get('demo_policy') # load weak classifier to bias random exploration (if demo_mode calls for it) if self.demo_mode.classifier: self.action_bias = self.demo_config.get('action_bias') self.action_bias_alternate = self.demo_config.get( 'action_bias_alternate') # load plan network to bias random exploration (if demo_mode calls for it) if self.demo_mode.plan_network: self.action_sequences = self.demo_config.get('action_sequences') # Setup the services self.query_state = rospy.ServiceProxy(simulator_node + '/query_state', QueryState) self.execute_action = rospy.ServiceProxy( simulator_node + '/execute_action', Execute) self.reset_sim = rospy.ServiceProxy( simulator_node + '/reset_simulation', Empty) self.n = 0 # number of executions self.prev_state = None self.timeout = 0 self.max_episode_length = max_episode_length if self.demo_mode.plan_network: self.current_node = 'start' self.prev_state_msg = None self.prev_action = None self.exploit_policy = exploit_policy if self.exploit_policy: self.query_status = rospy.ServiceProxy( simulator_node + '/query_status', QueryStatus), self.select_action = rospy.ServiceProxy( simulator_node + '/select_action', SelectAction)
def run(self): state_msg = self.query_state().state s = AMDPState(amdp_id=self.amdp_id, state=OOState(state=state_msg)) self.timeout += 1 goal_reached = goal_check(state_msg, self.amdp_id) if self.timeout > self.max_episode_length or goal_reached: self.timeout = 0 # self.reset_sim() self.epoch += 1 if goal_reached: self.successes += 1 if self.demo_mode.plan_network: self.current_node = 'start' self.prev_state_msg = None self.prev_action = None return exploit_check = random() if self.exploit_policy and exploit_check > self.exploit_epsilon: a = self.select_action(state_msg, Action()).action else: # plan network exploration, behavior implemented individually to stop conditionals from getting crazy if self.demo_mode.plan_network: # determine the current node in the plan network if self.prev_state_msg is None or self.prev_action is None: self.current_node = 'start' else: self.current_node = AMDPPlanAction(self.prev_state_msg, self.prev_action, state_msg, self.amdp_id) # select action a = Action() if self.demo_mode.classifier: if random() < self.alpha: action_list = [] if self.action_sequences.has_node(self.current_node): action_list = self.action_sequences.get_successor_actions( self.current_node, state_msg) else: self.current_node = self.action_sequences.find_suitable_node( state_msg) if self.current_node is not None: action_list = self.action_sequences.get_successor_actions( self.current_node, state_msg) # select action stochastically if we're in the network, select randomly otherwise if len(action_list) == 0: a = self.A[randint(0, len(self.A) - 1)] else: selection = random() count = 0 selected_action = action_list[0] for i in range(len(action_list)): count += action_list[i][1] if count >= selection: selected_action = action_list[i] break a.action_type = selected_action[0].action_type a.object = selected_action[0].action_object else: if self.demo_mode.classifier: if self.demo_mode.random and random( ) <= self.epsilon: a = self.A[randint(0, len(self.A) - 1)] else: features = s.to_vector() # Classify action probs = self.action_bias.predict_proba( np.asarray(features).reshape( 1, -1)).flatten().tolist() selection = random() cprob = 0 action_label = '0:apple' for i in range(0, len(probs)): cprob += probs[i] if cprob >= selection: action_label = self.action_bias.classes_[ i] break # Convert back to action a = Action() result = action_label.split(':') a.action_type = int(result[0]) if len(result) > 1: a.object = result[1] else: a = self.A[randint(0, len(self.A) - 1)] else: # select from the plan network, with a chance of random exploration, and use random exploration when # off of the network if random() < self.alpha: action_list = [] if self.action_sequences.has_node(self.current_node): action_list = self.action_sequences.get_successor_actions( self.current_node, state_msg) else: self.current_node = self.action_sequences.find_suitable_node( state_msg) if self.current_node is not None: action_list = self.action_sequences.get_successor_actions( self.current_node, state_msg) # select action stochastically if we're in the network, select randomly otherwise if len(action_list) == 0: a = self.A[randint(0, len(self.A) - 1)] else: selection = random() count = 0 selected_action = action_list[0] for i in range(len(action_list)): count += action_list[i][1] if count >= selection: selected_action = action_list[i] break a.action_type = selected_action[0].action_type a.object = selected_action[0].action_object else: a = self.A[randint(0, len(self.A) - 1)] self.prev_state_msg = state_msg # store state for the next iteration self.prev_action = action_to_sim(deepcopy(a), state_msg) else: if self.demo_mode.shadow and s in self.pi: if random() < self.alpha: a = self.pi[s].select_action() else: a = self.A[randint(0, len(self.A) - 1)] else: if self.demo_mode.classifier: # if random() < self.alpha: if self.demo_mode.random and random() <= self.epsilon: a = self.A[randint(0, len(self.A) - 1)] else: features = s.to_vector() # Classify action probs = self.action_bias.predict_proba( np.asarray(features).reshape( 1, -1)).flatten().tolist() selection = random() cprob = 0 action_label = '0:apple' for i in range(0, len(probs)): cprob += probs[i] if cprob >= selection: action_label = self.action_bias.classes_[i] break # Convert back to action a = Action() result = action_label.split(':') a.action_type = int(result[0]) if len(result) > 1: a.object = result[1] # else: # if self.demo_mode.random and random() <= self.epsilon: # a = self.A[randint(0, len(self.A) - 1)] # else: # features = s.to_vector() # # # Classify action # probs = self.action_bias.predict_proba(np.asarray(features).reshape(1, -1)).flatten().tolist() # selection = random() # cprob = 0 # action_label = '0:apple' # for i in range(0, len(probs)): # cprob += probs[i] # if cprob >= selection: # action_label = self.action_bias.classes_[i] # break # # Convert back to action # a = Action() # result = action_label.split(':') # a.action_type = int(result[0]) # if len(result) > 1: # a.object = result[1] else: a = self.A[randint(0, len(self.A) - 1)] self.execute_action(action_to_sim(deepcopy(a), state_msg)) s_prime = AMDPState(amdp_id=self.amdp_id, state=OOState(state=self.query_state().state)) self.action_executions += 1 self.transition_function.update_transition(s, a, s_prime) self.n += 1 self.prev_state = deepcopy(s)
def learn_q(self, s_prime, alpha=0.1, action_list=None): a_prime = None r_prime = reward(s_prime, amdp_id=self.amdp_id) noop = Action() noop.action_type = Action.NOOP if is_terminal(s_prime, amdp_id=self.amdp_id): sa_group = "{}/{}".format(self._state_idx(s_prime), self._action_idx(noop)) if sa_group not in self.Q: self.Q.create_dataset(sa_group, data=[0.]) self.Q[sa_group][0] = r_prime if self.s is not None: # Update the Q table sa_group = "{}/{}".format(self._state_idx(self.s), self._action_idx(self.a)) s_prime_key = self._state_idx(s_prime) if sa_group in self.Q: Q_sa = self.Q[sa_group] else: Q_sa = self.Q.create_dataset(sa_group, data=[0.]) # get best action and max Q value Q_sa_prime = -9999999 actions = [] action_list_extended = deepcopy(action_list) action_list_extended.append(noop) for act in action_list_extended: sa_prime_group = "{}/{}".format(self._state_idx(s_prime), self._action_idx(act)) if sa_prime_group in self.Q: q = self.Q[sa_prime_group][0] else: q = 0.0 if act.action_type == Action.NOOP and q == 0: continue if q > Q_sa_prime: Q_sa_prime = q actions = [act] elif q == Q_sa_prime: actions.append(act) if len(actions) > 1: a_prime = actions[randint(0, len(actions) - 1)] else: a_prime = actions[0] Q_sa[0] += alpha * (self.r + 0.8 * Q_sa_prime - Q_sa[0]) self.s = deepcopy(s_prime) self.r = deepcopy(r_prime) if a_prime is None or random() < self.epsilon: if self.mode == 0: a_prime = action_list[randint(0, len(action_list) - 1)] else: return None self.a = deepcopy(a_prime) return a_prime
def classify(self, req): """Return binary classification of an ordered grasp pair feature vector.""" action = Action() # Convert state to feature vector features = DataUtils.naive_state_vector( req.state, self.state_positions, self.state_semantics, history_buffer=self.history_buffer) # Classify action if self.stochastic: probs = self.action_model.predict_proba( np.asarray(features).reshape(1, -1)).flatten().tolist() selection = random() cprob = 0 action_label = 0 for i in range(1, len(probs)): cprob += probs[i] if cprob >= selection: action_label = self.action_model.classes_[i] break else: action_label = self.action_model.predict( np.asarray(features).reshape(1, -1)) action_type = DataUtils.get_action_from_label(action_label) action_modifier = DataUtils.get_action_modifier_from_label( action_label) action.action_type = action_type if action_type in [Action.GRASP]: action.object = DataUtils.int_to_name(action_modifier) # Augment state with action features.extend([action_type, action_modifier]) # Regress parameters where necessary if action_type in [Action.PLACE]: if self.semantic_place: if DataUtils.int_to_name(action_modifier) == 'Stack': # Pick a random free point on top of the stack of drawers points = [] if req.state.drawer_position.theta == 0 or req.state.drawer_position.theta == 180: for x in range(int(req.state.drawer_position.x - 3), int(req.state.drawer_position.x + 4)): for y in range( int(req.state.drawer_position.y - 2), int(req.state.drawer_position.y + 3)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z == 3: clear = False break if clear: points.append(Point(x, y, 3)) else: for x in range(int(req.state.drawer_position.x - 2), int(req.state.drawer_position.x + 3)): for y in range( int(req.state.drawer_position.y - 3), int(req.state.drawer_position.y + 4)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z == 3: clear = False break if clear: points.append(Point(x, y, 3)) if len(points) > 0: action.position = points[randint(0, len(points) - 1)] else: # Regress parameters for table or unexpected place surfaces target = self.place_model.predict( np.asarray(features).reshape(1, -1)) # Convert coordinates to global frame action.position = DataUtils.get_point_in_global_frame( req.state, Point(int(floor(target[0][0] + .5)), int(floor(target[0][1] + .5)), 0), DataUtils.int_to_name(action_modifier)) elif DataUtils.int_to_name(action_modifier) == 'Drawer': # Pick a random free point in the drawer that's also not in the drawer stack footprint points = [] if req.state.drawer_position.theta == 0: for x in range( int(req.state.drawer_position.x + 4), int(req.state.drawer_position.x + req.state.drawer_opening + 3)): for y in range( int(req.state.drawer_position.y - 1), int(req.state.drawer_position.y + 2)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z > 0: clear = False break if clear: points.append(Point(x, y, 2)) elif req.state.drawer_position.theta == 180: for x in range( int(req.state.drawer_position.x - req.state.drawer_opening - 2), int(req.state.drawer_position.x - 3)): for y in range( int(req.state.drawer_position.y - 1), int(req.state.drawer_position.y + 2)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z > 0: clear = False break if clear: points.append(Point(x, y, 2)) elif req.state.drawer_position.theta == 90: for x in range(int(req.state.drawer_position.x - 1), int(req.state.drawer_position.x + 2)): for y in range( int(req.state.drawer_position.y + 4), int(req.state.drawer_position.y + req.state.drawer_opening + 3)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z > 0: clear = False break if clear: points.append(Point(x, y, 2)) else: for x in range(int(req.state.drawer_position.x - 1), int(req.state.drawer_position.x + 2)): for y in range( int(req.state.drawer_position.y - req.state.drawer_opening - 2), int(req.state.drawer_position.y - 3)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z > 0: clear = False break if clear: points.append(Point(x, y, 2)) if len(points) > 0: action.position = points[randint(0, len(points) - 1)] else: # Regress parameters for table or unexpected place surfaces target = self.place_model.predict( np.asarray(features).reshape(1, -1)) # Convert coordinates to global frame action.position = DataUtils.get_point_in_global_frame( req.state, Point(int(floor(target[0][0] + .5)), int(floor(target[0][1] + .5)), 0), DataUtils.int_to_name(action_modifier)) elif DataUtils.int_to_name(action_modifier) == 'Box': # Special case: holding lid if req.state.object_in_gripper.lower() == 'lid': action.position = req.state.box_position else: # Pick a random free point in the box that's also not in the lid footprint points = [] for x in range(int(req.state.box_position.x - 1), int(req.state.box_position.x + 2)): for y in range(int(req.state.box_position.y - 1), int(req.state.box_position.y + 2)): if (x >= req.state.lid_position.x - 2 and x <= req.state.lid_position.x + 2 and y >= req.state.lid_position.y - 2 and y <= req.state.lid_position.y + 2): continue clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z <= 1: clear = False break if clear: points.append(Point(x, y, 2)) if len(points) > 0: action.position = points[randint( 0, len(points) - 1)] else: # Regress parameters for table or unexpected place surfaces target = self.place_model.predict( np.asarray(features).reshape(1, -1)) # Convert coordinates to global frame action.position = DataUtils.get_point_in_global_frame( req.state, Point(int(floor(target[0][0] + .5)), int(floor(target[0][1] + .5)), 0), DataUtils.int_to_name(action_modifier)) elif DataUtils.int_to_name(action_modifier) == 'Lid': # Pick a random free point on the lid points = [] for x in range(int(req.state.lid_position.x - 2), int(req.state.lid_position.x + 3)): for y in range(int(req.state.lid_position.y - 2), int(req.state.lid_position.y + 3)): clear = True for obj in req.state.objects: if obj.position.x == x and obj.position.y == y and obj.position.z == req.state.lid_position.z: clear = False break if clear: points.append(Point(x, y, 2)) if len(points) > 0: action.position = points[randint(0, len(points) - 1)] else: # Regress parameters for table or unexpected place surfaces target = self.place_model.predict( np.asarray(features).reshape(1, -1)) # Convert coordinates to global frame action.position = DataUtils.get_point_in_global_frame( req.state, Point(int(floor(target[0][0] + .5)), int(floor(target[0][1] + .5)), 0), DataUtils.int_to_name(action_modifier)) else: # Regress parameters for table or unexpected place surfaces target = self.place_model.predict( np.asarray(features).reshape(1, -1)) # Convert coordinates to global frame action.position = DataUtils.get_point_in_global_frame( req.state, Point(int(floor(target[0][0] + .5)), int(floor(target[0][1] + .5)), 0), DataUtils.int_to_name(action_modifier)) else: target = self.place_model.predict( np.asarray(features).reshape(1, -1)) # Convert coordinates to global frame action.position = DataUtils.get_point_in_global_frame( req.state, Point(int(floor(target[0][0] + .5)), int(floor(target[0][1] + .5)), 0), DataUtils.int_to_name(action_modifier)) if action_type in [Action.MOVE_ARM]: target = self.move_model.predict( np.asarray(features).reshape(1, -1)) # Convert coordinates to global frame action.position = DataUtils.get_point_in_global_frame( req.state, Point(int(floor(target[0][0] + .5)), int(floor(target[0][1] + .5)), 0), 'Gripper') return action
def initialize(self, transition_function): # initialize action list a = Action() if self.amdp_id == 3: # actions are overloaded here to use the same message type # if amdp_id is 3 (the highest-level abstract mdp), then the actions correspond to the amdp_id, i.e.: # 0 - open drawer # 1 - close drawer # 2 - put apple in drawer a.action_type = 0 self.actions.append(deepcopy(a)) a.action_type = 1 self.actions.append(deepcopy(a)) a.action_type = 2 self.actions.append(deepcopy(a)) elif self.amdp_id == 4: # actions are overloaded here to use the same message type # if amdp_id is 3 (the highest-level abstract mdp), then the actions correspond to the amdp_id, i.e.: # 0 - open drawer # 1 - close drawer # 2 - put apple in drawer # ground items are stored in the Action.object member a.action_type = 0 self.actions.append(deepcopy(a)) a.action_type = 1 self.actions.append(deepcopy(a)) a.action_type = 2 a.object = 'apple' self.actions.append(deepcopy(a)) a.object = 'banana' self.actions.append(deepcopy(a)) elif self.amdp_id == 5: # actions are overloaded here to use the same message type # if amdp_id is 3 (the highest-level abstract mdp), then the actions correspond to the amdp_id, i.e.: # 0 - open drawer # 1 - close drawer # 2 - put apple in drawer # ground items are stored in the Action.object member a.action_type = 0 self.actions.append(deepcopy(a)) a.action_type = 1 self.actions.append(deepcopy(a)) a.action_type = 2 a.object = 'apple' self.actions.append(deepcopy(a)) a.object = 'banana' self.actions.append(deepcopy(a)) a.object = 'carrot' self.actions.append(deepcopy(a)) elif self.amdp_id == 9: # actions are overloaded here to use the same message type # the actions correspond to the amdp_id, i.e.: # 6 - open box # 7 - close box # 8 - put carrot in box # ground items are stored in the Action.object member a.action_type = 6 self.actions.append(deepcopy(a)) a.action_type = 7 self.actions.append(deepcopy(a)) a.action_type = 8 a.object = 'carrot' self.actions.append(deepcopy(a)) elif self.amdp_id == 10: # actions are overloaded here to use the same message type # the actions correspond to the amdp_id, i.e.: # 4 - put fruits in drawer high-level amdp # 9 - put vegetable in box high-level amdp a.action_type = 4 self.actions.append(deepcopy(a)) a.action_type = 9 self.actions.append(deepcopy(a)) elif self.amdp_id == 11: # actions are overloaded here to use the same message type # the actions correspond to the amdp_id, i.e.: # 6 - open box # 7 - close box # 8 - put item in box # ground items are stored in the Action.object member a.action_type = 6 self.actions.append(deepcopy(a)) a.action_type = 7 self.actions.append(deepcopy(a)) a.action_type = 8 a.object = 'carrot' self.actions.append(deepcopy(a)) a.object = 'daikon' self.actions.append(deepcopy(a)) elif self.amdp_id == 12: # actions are overloaded here to use the same message type # the actions correspond to the amdp_id, i.e.: # 4 - put fruits in drawer high-level amdp # 11 - put vegetables in box high-level amdp a.action_type = 4 self.actions.append(deepcopy(a)) a.action_type = 11 self.actions.append(deepcopy(a)) else: a.action_type = Action.GRASP if self.amdp_id >= 0 and self.amdp_id <= 2: for o in self.grasp_objects_drawer: a.object = o self.actions.append(deepcopy(a)) elif self.amdp_id >= 6 and self.amdp_id <= 8: for o in self.grasp_objects_box: a.object = o self.actions.append(deepcopy(a)) if self.amdp_id == -2: a.object = 'banana' self.actions.append(deepcopy(a)) elif self.amdp_id == -3: a.object = 'banana' self.actions.append(deepcopy(a)) a.object = 'carrot' self.actions.append(deepcopy(a)) a.action_type = Action.PLACE if self.amdp_id >= 0 and self.amdp_id <= 2: for o in self.place_objects_drawer: a.object = o self.actions.append(deepcopy(a)) elif self.amdp_id >= 6 and self.amdp_id <= 8: for o in self.place_objects_box: a.object = o self.actions.append(deepcopy(a)) a.action_type = Action.MOVE_ARM if self.amdp_id >= 0 and self.amdp_id <= 2: for o in self.move_objects_drawer: a.object = o self.actions.append(deepcopy(a)) elif self.amdp_id >= 6 and self.amdp_id <= 8: for o in self.move_objects_box: a.object = o self.actions.append(deepcopy(a)) if self.amdp_id == -2: a.object = 'banana' self.actions.append(deepcopy(a)) elif self.amdp_id == -3: a.object = 'banana' self.actions.append(deepcopy(a)) a.object = 'carrot' self.actions.append(deepcopy(a)) a.object = '' a.action_type = Action.OPEN_GRIPPER self.actions.append(deepcopy(a)) a.action_type = Action.CLOSE_GRIPPER self.actions.append(deepcopy(a)) a.action_type = Action.RAISE_ARM self.actions.append(deepcopy(a)) a.action_type = Action.LOWER_ARM self.actions.append(deepcopy(a)) a.action_type = Action.RESET_ARM self.actions.append(deepcopy(a)) # initialize transition function assert transition_function is not None, "Unknown transition function!" self.T = transition_function # initialize utilities for states in transition function self.init_utilities()