def main(): rospy.init_node('issue_com') pub = rospy.Publisher(TRIAL_COM_TOPIC, TrialCommand, queue_size=10) test_pub = rospy.Publisher(TEST_TOPIC, Empty, queue_size=10) sub = rospy.Subscriber(POS_COM_TOPIC, TrialCommand, listen) sub2 = rospy.Subscriber(RESULT_TOPIC, SampleResult, listen_report) #sub = rospy.Subscriber('/joint_states', JointState, listen) tc = TrialCommand() T = 1 tc.controller = get_lin_gauss_test(T=T) tc.T = T tc.frequency = 20.0 # NOTE: ordering of datatypes in state is determined by the order here tc.state_datatypes = [JOINT_ANGLES, JOINT_VELOCITIES] tc.obs_datatypes = tc.state_datatypes tc.ee_points = EE_SITES.reshape(EE_SITES.size).tolist() r = rospy.Rate(1) #while not rospy.is_shutdown(): # pub.publish(pc) # r.sleep() # print 'published!' r.sleep() test_pub.publish(Empty()) pub.publish(tc) rospy.spin()
def sample(self, policy, condition, reset=True, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. Returns: sample: A Sample object. """ if TfPolicy is not None: # user has tf installed. if isinstance(policy, TfPolicy): self._init_tf(policy.dU) if reset: self.reset(condition) # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Execute trial. trial_command = TrialCommand() trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][condition].tolist() trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['state_include'] if self.use_tf is False: sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout']) sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample else: self._trial_service.publish(trial_command) sample_msg = self.run_trial_tf( policy, time_to_run=self._hyperparams['trial_timeout']) sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. Returns: sample: A Sample object. """ if TfPolicy is not None: # user has tf installed. if isinstance(policy, TfPolicy): self._init_tf(policy.dU) self.reset(condition) # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Execute trial. trial_command = TrialCommand() trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][condition].tolist() trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['state_include'] if self.use_tf is False: sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout'] ) sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample else: self._trial_service.publish(trial_command) sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout']) sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample
def sample(self, policy, condition, reset=True, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. Returns: sample: A Sample object. """ if reset: self.reset(condition) # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # noise = noise*0.01 # Execute trial. trial_command = TrialCommand() trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] # ee_points and ee_points_tgt is uneccesary for mobile robot trial_command.ee_points = [] trial_command.ee_points_tgt = [] trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['obs_include'] sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout']) sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample
def execute(self, policy): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. Returns: sample: A Sample object. """ if TfPolicy is not None: # user has tf installed. if isinstance(policy, TfPolicy): self._init_tf(policy.dU) #self.reset(condition) # Generate noise. noise = generate_noise(self.T, self.dU, self._hyperparams) # Execute trial. trial_command = TrialCommand() trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() #### @todo: tgt command try ##########Brook#################################################### #trial_command.ee_points_tgt = self._tgt_subscribe.subscribe_and_wait() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][0].tolist() trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['state_include'] if self.use_tf is False: sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout']) sample = msg_to_sample(sample_msg, self) else: self._trial_service.publish(trial_command) sample_msg = self.run_trial_tf( policy, time_to_run=self._hyperparams['trial_timeout']) sample = msg_to_sample(sample_msg, self)
def execute(self, policy): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. Returns: sample: A Sample object. """ if TfPolicy is not None: # user has tf installed. if isinstance(policy, TfPolicy): self._init_tf(policy.dU) #self.reset(condition) # Generate noise. noise = generate_noise(self.T, self.dU, self._hyperparams) # Execute trial. trial_command = TrialCommand() trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() #### @todo: tgt command try ##########Brook#################################################### #trial_command.ee_points_tgt = self._tgt_subscribe.subscribe_and_wait() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][0].tolist() trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['state_include'] if self.use_tf is False: sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout'] ) sample = msg_to_sample(sample_msg, self) else: self._trial_service.publish(trial_command) sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout']) sample = msg_to_sample(sample_msg, self)
def sample(self, policy, condition, verbose=True, save=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. Returns: sample: A Sample object. """ self.reset(condition) # Generate noise. noise = generate_noise(self.T, self.dU, self._hyperparams) # Execute trial. trial_command = TrialCommand() trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][condition].tolist() trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['state_include'] sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout'] ) sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. Returns: sample: A Sample object. """ # There are different trajectories based on if reset or not if self.reset_time: trajectories = self.reset_trajectories else: trajectories = self.trajectories if condition not in trajectories: # If this hasn't been initialized yet if self.reset_time: self.init_reset_traj(condition, policy) else: self.compute_reference_trajectory(condition, policy) self.samples_taken[condition] += 1 # Increment number of samples taken # Every some many samples, take more of the trajectory # Unless we have reached the end or we are using varying T if self.samples_taken[condition] % (self.num_samples * self.iter_per_seg) == 0 and self.iter_count != 0 \ and (self.T != self.final_T or self.varying_T): self.update_T_then_policy(policy, condition) self.T = self.cur_T[ condition] # Make sure the T is correct for the condition we are on ref_traj_info = trajectories[condition] # Length of the trajectory traj_length = len(self.full_ref_ee[condition]) # This is how long the current trajectory we're using is - self.T if self.T == self.final_T: # If we have gotten to the whole trajectory ref_traj = self.trajectories[condition][ 'ee'] # Current reference trajectory else: # Otherwise pad the reference trajectory as well ref_traj = self.trajectories[condition]['ee'][:self.T - self.padding] ref_traj.extend([ self.trajectories[condition]['ee'][self.T - self.padding - 1] ] * self.padding) print('The length of the trajectory we are currently using is ' + str(self.T)) print 'Sampling, condition', condition self.reset(condition) #added from agent_ros.py of public gps codebase if TfPolicy is not None: # user has tf installed. if isinstance(policy, TfPolicy): print('well this got called') self._init_tf(policy.dU) # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Execute trial. trial_command = TrialCommand() #trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) if self.samples_taken[ condition] % self.num_samples == 0 and self.samples_taken[ condition] != 0: self.iter_count += 1 # This is the full count self.pickle_self() # Pickle self and send to data files lmaooo with open( 'iter' + str(self.iter_count) + '_cond' + str(condition) + '.txt', 'w') as f: the_noise = np.zeros((self.T, self.dU)) f.write(str(policy_to_msg(policy, the_noise))) trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() trial_command.ee_points_tgt = ref_traj[-1] trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['state_include'] if self.use_tf is False: sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout']) else: self._trial_service.publish(trial_command) sample_msg = self.run_trial_tf( policy, time_to_run=self._hyperparams['trial_timeout']) sample = msg_to_sample(sample_msg, self) sample.set('target_traj_ee_points', [points - ref_traj[-1] for points in ref_traj]) sample.set(REF_OFFSETS, ref_traj_info['offsets'][:self.T]) sample.set(REF_TRAJ, np.array([ref_traj_info['offsets'].flatten()] * self.T)) if save: self._samples[condition].append(sample) if self.varying_T: # Only save this if you are gonna use varying T self.saved_samples[condition].append( sample) # Save it here too just in case self.reset(condition) # Might as well reset for the heck of it return sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True, use_TfController=False, first_itr=False, timeout=None, reset=True, rnd=None): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. use_TfController: Whether to use the syncronous TfController Returns: sample: A Sample object. """ if use_TfController: self._init_tf(policy, policy.dU) self.use_tf = True self.cur_timestep = 0 self.sample_save = save self.active = True self.policy = policy if reset: self.reset(condition, rnd=rnd) self.condition = condition # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) self.noise = noise else: noise = np.zeros((self.T, self.dU)) self.noise = None # Fill in trial command trial_command = TrialCommand() trial_command.id = self._get_next_seq_id() trial_command.controller = \ policy_to_msg(policy, noise, use_TfController=use_TfController) if timeout is not None: trial_command.T = timeout else: trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][self.condition].tolist() trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['state_include'] # Execute trial. sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=(trial_command.T + self._hyperparams['trial_timeout'])) if self.vision_enabled: sample_msg = self.add_rgb_stream_to_sample(sample_msg) sample = msg_to_sample(sample_msg, self) #sample = self.replace_samplestates_with_errorstates(sample, self.x_tgt[condition]) if save: self._samples[condition].append(sample) self.active = False return sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. Returns: sample: A Sample object. """ if TfPolicy is not None: # user has tf installed (try import) if isinstance(policy, TfPolicy): # False, policy = Linear Gaussian policy self._init_tf(policy.dU) # Reset the agent for a particular experiment condition self.reset(condition) # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Execute trial. trial_command = TrialCommand() # ROS message trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) # ControllerParams trial_command.T = self.T # Trajectory length trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] # Controller frequency ee_points = self._hyperparams['end_effector_points'] # 3*n_points array containing offsets trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][condition].tolist() # 3*n_points array containing the desired ee_points for this trial trial_command.state_datatypes = self._hyperparams['state_include'] # Which data types to include in state trial_command.obs_datatypes = self._hyperparams['state_include'] # Which data types to include in observation # ------------- Local Policy ------------- # use_tf is False if self.use_tf is False: # self._trial_service = ServiceEmulator( # self._hyperparams['trial_command_topic'], TrialCommand, # self._hyperparams['sample_result_topic'], SampleResult # ) # Publish a message and wait for the response sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout'] ) sample = msg_to_sample(sample_msg, self) # Saving the samples for tf[?] if save: self._samples[condition].append(sample) return sample else: self._trial_service.publish(trial_command) # Run an async controller from a policy. # The async controller receives observations from ROS subscribers # and then uses them to publish actions sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout']) sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. Returns: sample: A Sample object. """ if TfPolicy is not None: # user has tf installed. if isinstance(policy, TfPolicy): self._init_tf(policy.dU) self.reset(condition) # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Execute trial. trial_command = TrialCommand() trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][condition].tolist() trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['obs_include'] if self.use_tf is False or not isinstance(policy, TfPolicy): print 'Not using TF controller' sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout'] ) sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample else: ''' print 'Using TF controller' self._trial_service.publish(trial_command) sample_msg = self.run_trial_tf(policy, condition, time_to_run=self._hyperparams['trial_timeout']) pdb.set_trace() sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample ''' self.trial_manager.prep(policy, condition) self._trial_service.publish(trial_command, wait=True) self.trial_manager.run(self._hyperparams['trial_timeout']) while self._trial_service._waiting: print 'Waiting for sample to come in' rospy.sleep(1.0) sample_msg = self._trial_service._subscriber_msg sample = msg_to_sample(sample_msg, self) sample.set(NOISE, noise) sample.set(TIMESTEP, np.arange(self.T).reshape((self.T,1))) return sample