class Agent_Manager(object): def __init__(self, robot_id): self.robot_id = robot_id # for now...will figure out how to do this later self.cmd_vel_topic = "/action#" + str(self.robot_id) self.policy = DoubleQ( doubleQPars, name='Dummy', task=Task(), load_path= '/home/jimmy/Documents/Research/AN_Bridging/results/policy_comparison_results/all_final/hierarchical_q_policy2.txt' ) self.controller = HierarchicalController() rospy.Subscriber("/robotState" + str(self.robot_id), String, self.receive_state_info, queue_size=1) rospy.Subscriber("/shutdown" + str(self.robot_id), Int16, self.shutdown, queue_size=1) rospy.Subscriber('/finished', Int8, self.receive_restart, queue_size=1) self.pub = rospy.Publisher("/robotAction" + str(self.robot_id), Vector3, queue_size=1) self.pub_finish = rospy.Publisher("/robot_finished", Int16, queue_size=1) self.period = 50 self.counter = 0 self.shut_down = False self.sleep_duration = 5 self.box_distance_finished_condition = .4 # These was not the parameter used in training self.marker_distance_finished_condition = .4 # These were the parameters used in training print(' Robot number:', robot_id, ' is ready!') rospy.wait_for_message('/map_is_ready', Int16) # just wait for the message indicator self.publish_finished_to_map() self.waiting_for_target = False rospy.spin() return def receive_state_info(self, msg): state = np.array(vrep.simxUnpackFloats(msg.data)) if self.shut_down: msg = Vector3() msg.x = 1 msg.y = 1 self.pub.publish(msg) elif self.finished(state): if (not self.is_not_pushing_box(state) ) and state[7] < -.25 and dist(state[5:7], np.zeros(2)) < 1: # hole msg = Vector3() msg.x = -2 msg.y = -2 else: msg = Vector3() msg.x = 0 msg.y = 0 self.pub.publish(msg) if self.counter == 0 and not self.shut_down: if self.finished(state): if not self.waiting_for_target: rospy.sleep(1) self.publish_finished_to_map() self.waiting_for_target = True rospy.wait_for_message("/target" + str(self.robot_id), Vector3) self.waiting_for_target = False else: self.controller.goal = state.ravel()[:2] if self.is_not_pushing_box(state): action_index = 0 if abs( state[6] ) > .5 else 1 # align yourself otherwise travel towards node else: if any(np.isnan(state)): print(state) assert not any(np.isnan(state)) action_index = self.policy.get_action(state, testing_time=True, probabilistic=True) action_name = action_map[action_index] adjusted_state_for_controls = self.controller.feature_2_task_state( state) left_right_frequencies = self.controller.getPrimitive( adjusted_state_for_controls, action_name) msg = Vector3() msg.x = left_right_frequencies[0] msg.y = left_right_frequencies[1] self.pub.publish(msg) self.counter = (self.counter + 1) % self.period return def is_not_pushing_box(self, state): # Check V-Rep sim for reference why this is true. For convenience. return all([state[i] == -1 for i in range(5)]) def finished(self, state): if self.is_not_pushing_box(state): flat = state.flatten() if flat[7] < -.25: return dist(state[5:7], np.zeros(2)) < .4 return dist(state[5:8], np.zeros(3)) < .3 else: flat = state.flatten() if flat[7] < -.2: # hole return flat[ 2] < -.15 # and dist(state[:2], state[5:7]) < self.box_distance_finished_condition return dist(state[:3], state[5:8]) < self.box_distance_finished_condition def publish_finished_to_map(self): self.pub_finish.publish(Int16(self.robot_id)) return def shutdown(self, msg): protocol = msg.data if protocol == 1: # shutdown completely self.shut_down = True else: self.shut_down = False def receive_restart(self, msg): self.shut_down = False rospy.wait_for_message('/starting', Int16) self.publish_finished_to_map()
class Planner(object): def __init__(self, network_parameters, name): og_params = copy.deepcopy(network_parameters) self.fail = rospy.Publisher("/restart", Int8, queue_size=1) self.agents = network_parameters['agents'] self.net_params = network_parameters['valPars'] self.train_params = network_parameters['valTrain'] self.pubs = OrderedDict() for key in self.agents.keys(): bot = self.agents[key] self.pubs[key] = rospy.Publisher(bot['pub'], Vector3, queue_size=1) self.name = name rospy.Subscriber(self.agents[self.name]['sub'], String, self.receiveState, queue_size=1) self.changePoint = rospy.Publisher('/phaseChange', Int8, queue_size=1) self.action_map = { 0: 'APPROACH', 1: 'ANGLE_TOWARDS', 2: 'PUSH_IN', 3: 'ALIGN_Y', 4: 'PUSH_LEFT', 5: 'PUSH_RIGHT', 6: 'MOVE_BACK', 7: 'ANGLE_TOWARDS_GOAL' } self.goalSphere = None # (point, radius) self.prevPrim = None self.reset_sequence = True self.max_num_actions = 5 self.max_num_rollouts = 1 self.MPC = False self.GP = False self.mode = "Analysis" #'Plan' # self.box_policy = DoubleQ( network_parameters, self.name, HierarchyTask(), load_path= '/home/austinnguyen517/Documents/Research/BML/MultiRobot/AN_Bridging/model_training_data/Hybrid/Combination_Gaussian/hierarchical_q_policy2.txt' ) # self.model = Network(self.vPars, self.vTrain) self.controller = HierarchyTask() self.analysis = Analysis(self.mode) self.success, self.success_prime, self.fail, self.fail_prime, self.id_to_primitive, self.primitive_to_id = self.analysis.get_data( ) success_start, fail_start = self.analysis.get_start_data() if self.mode == 'Analysis': self.analysis.analyze_requested_data() sys.exit(0) self.train_model() while (True): x = 1 + 1 def initAgent(self, agent): pass def train_model(self): states = np.vstack((self.success, self.fail)) results = np.vstack((self.success_prime, self.fail_prime)) targets = results - states losses = [] for i in range(500): batch = np.random.choice(states.shape[0], size=512) s = states[batch, :] s_delta_tar = targets[batch, :] delta = self.model(torch.FloatTensor(s)) loss = self.model.get_loss(delta, s_delta_tar) self.model.optimizer.zero_grad() loss.backward() self.model.optimizer.step() losses.append(loss) plt.plot(range(len(losses)), losses) plt.show() def concatenate_identifier(self, s): return np.hstack((s, np.repeat(1, s.shape[0]).reshape(-1, 1))) def sendActionForPlan(self, states, phase): s = states['feature'] if dist(s[:3], s[5:8]) < .3: msg = Int8() msg.data = 1 self.changePoint.publish(msg) action_index = self.box_policy.get_action( self.concatenate_identifier(s)) self.controller.goal = s.ravel()[:2] action = self.controller.getPrimitive( self.controller.feature_2_task_state(s.ravel()), self.action_map[action_index]) msg.x, msg.y = (action[0], action[1]) self.pubs[self.name].publish(msg) return def stop_moving(self): dummy_task = self.primitive_tasks['slope_push'] dummy_task.pubs = self.pubs dummy_task.name = self.name dummy_task.stop_moving() def split_state(self, s): states = {} states['slope'] = self.primitive_tasks[ 'slope_push'].feature_joint_2_joint( np.hstack((s[:9], s[10:19], s[20:22]))) states['feature'] = np.hstack((s[:5], s[6:10])) states['feature_next'] = np.hstack((s[:5], s[22:26])) states['flat'] = self.primitive_tasks['cross'].feature_2_task_state( states['feature']) return states def receiveState(self, msg): if self.mode == 'Plan': floats = vrep.simxUnpackFloats(msg.data) floats = np.array(floats).ravel() phase = floats[-1] states = self.split_state(floats[:-1]) a = self.sendActionForPlan(states, phase) return def restartProtocol(self, restart): if restart == 1: msg = Int8() msg.data = 1 self.curr_rollout = [] self.fail.publish(msg) ######### POST TRAINING ######### def postTraining(self): return