Example #1
0
    def sample(self,
               policy,
               condition,
               reset=True,
               verbose=True,
               save=True,
               noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        if reset:
            self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(
                policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
Example #2
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
Example #3
0
    def execute(self, policy):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        #self.reset(condition)
        # Generate noise.
        noise = generate_noise(self.T, self.dU, self._hyperparams)

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        #### @todo: tgt command try ##########Brook####################################################
        #trial_command.ee_points_tgt = self._tgt_subscribe.subscribe_and_wait()


        trial_command.ee_points_tgt = \
            self._hyperparams['ee_points_tgt'][0].tolist()

        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(
                policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
Example #4
0
    def execute(self, policy):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        #self.reset(condition)
        # Generate noise.
        noise = generate_noise(self.T, self.dU, self._hyperparams)

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
#### @todo: tgt command try ##########Brook####################################################
        #trial_command.ee_points_tgt = self._tgt_subscribe.subscribe_and_wait()


        trial_command.ee_points_tgt = \
            self._hyperparams['ee_points_tgt'][0].tolist()

        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
Example #5
0
 def get_data(self):
     """
     Request for the most recent value for data/sensor readings.
     Returns entire sample report (all available data) in sample.
     """
     request = DataRequest()
     request.id = self._get_next_seq_id()
     request.arm = TRIAL_ARM
     request.stamp = rospy.get_rostime()
     result_msg = self._data_service.publish_and_wait(request)
     # TODO - Make IDs match, assert that they match elsewhere here.
     sample = msg_to_sample(result_msg, self)
     return sample
Example #6
0
 def get_data(self, arm=TRIAL_ARM):
     """
     Request for the most recent value for data/sensor readings.
     Returns entire sample report (all available data) in sample.
     Args:
         arm: TRIAL_ARM or AUXILIARY_ARM.
     """
     request = DataRequest()
     request.id = self._get_next_seq_id()
     request.arm = arm
     request.stamp = rospy.get_rostime()
     result_msg = self._data_service.publish_and_wait(request)
     # TODO - Make IDs match, assert that they match elsewhere here.
     sample = msg_to_sample(result_msg, self)
     return sample
Example #7
0
    def sample(self,
               policy,
               condition,
               reset=True,
               verbose=True,
               save=True,
               noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if reset:
            self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))
        # noise = noise*0.01

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        # ee_points and ee_points_tgt is uneccesary for mobile robot
        trial_command.ee_points = []
        trial_command.ee_points_tgt = []
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['obs_include']

        sample_msg = self._trial_service.publish_and_wait(
            trial_command, timeout=self._hyperparams['trial_timeout'])

        sample = msg_to_sample(sample_msg, self)
        if save:
            self._samples[condition].append(sample)
        return sample
Example #8
0
    def sample(self, policy, condition, verbose=True, save=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
        Returns:
            sample: A Sample object.
        """
        self.reset(condition)

        # Generate noise.
        noise = generate_noise(self.T, self.dU, self._hyperparams)

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']
        sample_msg = self._trial_service.publish_and_wait(
            trial_command, timeout=self._hyperparams['trial_timeout']
        )

        sample = msg_to_sample(sample_msg, self)
        if save:
            self._samples[condition].append(sample)
        return sample
Example #9
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
        Returns:
            sample: A Sample object.
        """
        # There are different trajectories based on if reset or not
        if self.reset_time:
            trajectories = self.reset_trajectories
        else:
            trajectories = self.trajectories

        if condition not in trajectories:  # If this hasn't been initialized yet
            if self.reset_time:
                self.init_reset_traj(condition, policy)
            else:
                self.compute_reference_trajectory(condition, policy)

        self.samples_taken[condition] += 1  # Increment number of samples taken

        # Every some many samples, take more of the trajectory
        # Unless we have reached the end or we are using varying T
        if self.samples_taken[condition] % (self.num_samples * self.iter_per_seg) == 0 and self.iter_count != 0 \
        and (self.T != self.final_T or self.varying_T):
            self.update_T_then_policy(policy, condition)

        self.T = self.cur_T[
            condition]  # Make sure the T is correct for the condition we are on

        ref_traj_info = trajectories[condition]
        # Length of the trajectory
        traj_length = len(self.full_ref_ee[condition])
        # This is how long the current trajectory we're using is - self.T
        if self.T == self.final_T:  # If we have gotten to the whole trajectory
            ref_traj = self.trajectories[condition][
                'ee']  # Current reference trajectory
        else:  # Otherwise pad the reference trajectory as well
            ref_traj = self.trajectories[condition]['ee'][:self.T -
                                                          self.padding]
            ref_traj.extend([
                self.trajectories[condition]['ee'][self.T - self.padding - 1]
            ] * self.padding)

        print('The length of the trajectory we are currently using is ' +
              str(self.T))
        print 'Sampling, condition', condition
        self.reset(condition)

        #added from agent_ros.py of public gps codebase
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                print('well this got called')
                self._init_tf(policy.dU)

        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        #trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)

        if self.samples_taken[
                condition] % self.num_samples == 0 and self.samples_taken[
                    condition] != 0:
            self.iter_count += 1  # This is the full count
            self.pickle_self()  # Pickle self and send to data files lmaooo
            with open(
                    'iter' + str(self.iter_count) + '_cond' + str(condition) +
                    '.txt', 'w') as f:
                the_noise = np.zeros((self.T, self.dU))
                f.write(str(policy_to_msg(policy, the_noise)))

        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = ref_traj[-1]
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout'])
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(
                policy, time_to_run=self._hyperparams['trial_timeout'])

        sample = msg_to_sample(sample_msg, self)
        sample.set('target_traj_ee_points',
                   [points - ref_traj[-1] for points in ref_traj])

        sample.set(REF_OFFSETS, ref_traj_info['offsets'][:self.T])
        sample.set(REF_TRAJ,
                   np.array([ref_traj_info['offsets'].flatten()] * self.T))

        if save:
            self._samples[condition].append(sample)
        if self.varying_T:  # Only save this if you are gonna use varying T
            self.saved_samples[condition].append(
                sample)  # Save it here too just in case
        self.reset(condition)  # Might as well reset for the heck of it
        return sample
Example #10
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed (try import)
            if isinstance(policy, TfPolicy):        # False, policy = Linear Gaussian policy 
                self._init_tf(policy.dU)

        # Reset the agent for a particular experiment condition
        self.reset(condition)

        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand() # ROS message 
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise) # ControllerParams
        trial_command.T = self.T                                # Trajectory length
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']    # Controller frequency
        
        ee_points = self._hyperparams['end_effector_points']        # 3*n_points array containing offsets
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist() #  3*n_points array containing the desired ee_points for this trial
        
        trial_command.state_datatypes = self._hyperparams['state_include']  # Which data types to include in state
        trial_command.obs_datatypes = self._hyperparams['state_include']    # Which data types to include in observation

        # ------------- Local Policy -------------
        # use_tf is False 
        if self.use_tf is False:
            # self._trial_service = ServiceEmulator(
            #     self._hyperparams['trial_command_topic'], TrialCommand,
            #     self._hyperparams['sample_result_topic'], SampleResult
            # )
            
            # Publish a message and wait for the response
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
            
            # Saving the samples for tf[?]
            if save:
                self._samples[condition].append(sample)
            
            return sample
        else:
            self._trial_service.publish(trial_command)

            # Run an async controller from a policy. 
            # The async controller receives observations from ROS subscribers
            # and then uses them to publish actions
            sample_msg = self.run_trial_tf(policy, 
                time_to_run=self._hyperparams['trial_timeout'])
            
            sample = msg_to_sample(sample_msg, self)
            
            if save:
                self._samples[condition].append(sample)
            return sample
Example #11
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']

        trial_command.obs_datatypes = self._hyperparams['obs_include']

        if self.use_tf is False or not isinstance(policy, TfPolicy):
            print 'Not using TF controller'
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
        else:
            '''
            print 'Using TF controller'
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(policy, condition, time_to_run=self._hyperparams['trial_timeout'])
            pdb.set_trace()
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
            '''
            self.trial_manager.prep(policy, condition)
            self._trial_service.publish(trial_command, wait=True)
            self.trial_manager.run(self._hyperparams['trial_timeout'])
            while self._trial_service._waiting:
                print 'Waiting for sample to come in'
                rospy.sleep(1.0)
            sample_msg = self._trial_service._subscriber_msg

        sample = msg_to_sample(sample_msg, self)
        sample.set(NOISE, noise)
        sample.set(TIMESTEP, np.arange(self.T).reshape((self.T,1)))

        return sample