Exemple #1
0
    def sample(self,
               policy,
               condition,
               reset=True,
               verbose=True,
               save=True,
               noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        if reset:
            self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(
                policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
Exemple #2
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
Exemple #3
0
    def sample(self,
               policy,
               condition,
               reset=True,
               verbose=True,
               save=True,
               noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if reset:
            self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))
        # noise = noise*0.01

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        # ee_points and ee_points_tgt is uneccesary for mobile robot
        trial_command.ee_points = []
        trial_command.ee_points_tgt = []
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['obs_include']

        sample_msg = self._trial_service.publish_and_wait(
            trial_command, timeout=self._hyperparams['trial_timeout'])

        sample = msg_to_sample(sample_msg, self)
        if save:
            self._samples[condition].append(sample)
        return sample
Exemple #4
0
    def execute(self, policy):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        #self.reset(condition)
        # Generate noise.
        noise = generate_noise(self.T, self.dU, self._hyperparams)

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        #### @todo: tgt command try ##########Brook####################################################
        #trial_command.ee_points_tgt = self._tgt_subscribe.subscribe_and_wait()


        trial_command.ee_points_tgt = \
            self._hyperparams['ee_points_tgt'][0].tolist()

        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(
                policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
Exemple #5
0
    def execute(self, policy):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        #self.reset(condition)
        # Generate noise.
        noise = generate_noise(self.T, self.dU, self._hyperparams)

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
#### @todo: tgt command try ##########Brook####################################################
        #trial_command.ee_points_tgt = self._tgt_subscribe.subscribe_and_wait()


        trial_command.ee_points_tgt = \
            self._hyperparams['ee_points_tgt'][0].tolist()

        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
Exemple #6
0
    def sample(self, policy, condition, verbose=True, save=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
        Returns:
            sample: A Sample object.
        """
        self.reset(condition)

        # Generate noise.
        noise = generate_noise(self.T, self.dU, self._hyperparams)

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']
        sample_msg = self._trial_service.publish_and_wait(
            trial_command, timeout=self._hyperparams['trial_timeout']
        )

        sample = msg_to_sample(sample_msg, self)
        if save:
            self._samples[condition].append(sample)
        return sample
Exemple #7
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
        Returns:
            sample: A Sample object.
        """
        # There are different trajectories based on if reset or not
        if self.reset_time:
            trajectories = self.reset_trajectories
        else:
            trajectories = self.trajectories

        if condition not in trajectories:  # If this hasn't been initialized yet
            if self.reset_time:
                self.init_reset_traj(condition, policy)
            else:
                self.compute_reference_trajectory(condition, policy)

        self.samples_taken[condition] += 1  # Increment number of samples taken

        # Every some many samples, take more of the trajectory
        # Unless we have reached the end or we are using varying T
        if self.samples_taken[condition] % (self.num_samples * self.iter_per_seg) == 0 and self.iter_count != 0 \
        and (self.T != self.final_T or self.varying_T):
            self.update_T_then_policy(policy, condition)

        self.T = self.cur_T[
            condition]  # Make sure the T is correct for the condition we are on

        ref_traj_info = trajectories[condition]
        # Length of the trajectory
        traj_length = len(self.full_ref_ee[condition])
        # This is how long the current trajectory we're using is - self.T
        if self.T == self.final_T:  # If we have gotten to the whole trajectory
            ref_traj = self.trajectories[condition][
                'ee']  # Current reference trajectory
        else:  # Otherwise pad the reference trajectory as well
            ref_traj = self.trajectories[condition]['ee'][:self.T -
                                                          self.padding]
            ref_traj.extend([
                self.trajectories[condition]['ee'][self.T - self.padding - 1]
            ] * self.padding)

        print('The length of the trajectory we are currently using is ' +
              str(self.T))
        print 'Sampling, condition', condition
        self.reset(condition)

        #added from agent_ros.py of public gps codebase
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                print('well this got called')
                self._init_tf(policy.dU)

        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        #trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)

        if self.samples_taken[
                condition] % self.num_samples == 0 and self.samples_taken[
                    condition] != 0:
            self.iter_count += 1  # This is the full count
            self.pickle_self()  # Pickle self and send to data files lmaooo
            with open(
                    'iter' + str(self.iter_count) + '_cond' + str(condition) +
                    '.txt', 'w') as f:
                the_noise = np.zeros((self.T, self.dU))
                f.write(str(policy_to_msg(policy, the_noise)))

        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = ref_traj[-1]
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout'])
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(
                policy, time_to_run=self._hyperparams['trial_timeout'])

        sample = msg_to_sample(sample_msg, self)
        sample.set('target_traj_ee_points',
                   [points - ref_traj[-1] for points in ref_traj])

        sample.set(REF_OFFSETS, ref_traj_info['offsets'][:self.T])
        sample.set(REF_TRAJ,
                   np.array([ref_traj_info['offsets'].flatten()] * self.T))

        if save:
            self._samples[condition].append(sample)
        if self.varying_T:  # Only save this if you are gonna use varying T
            self.saved_samples[condition].append(
                sample)  # Save it here too just in case
        self.reset(condition)  # Might as well reset for the heck of it
        return sample
Exemple #8
0
    def sample(self,
               policy,
               condition,
               verbose=True,
               save=True,
               noisy=True,
               use_TfController=False,
               first_itr=False,
               timeout=None,
               reset=True,
               rnd=None):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
            use_TfController: Whether to use the syncronous TfController
        Returns:
            sample: A Sample object.
        """
        if use_TfController:
            self._init_tf(policy, policy.dU)
            self.use_tf = True
            self.cur_timestep = 0
            self.sample_save = save
            self.active = True

        self.policy = policy

        if reset:
            self.reset(condition, rnd=rnd)
            self.condition = condition

        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
            self.noise = noise
        else:
            noise = np.zeros((self.T, self.dU))
            self.noise = None

        # Fill in trial command
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = \
                policy_to_msg(policy, noise, use_TfController=use_TfController)
        if timeout is not None:
            trial_command.T = timeout
        else:
            trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][self.condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        # Execute trial.
        sample_msg = self._trial_service.publish_and_wait(
            trial_command,
            timeout=(trial_command.T + self._hyperparams['trial_timeout']))
        if self.vision_enabled:
            sample_msg = self.add_rgb_stream_to_sample(sample_msg)
        sample = msg_to_sample(sample_msg, self)
        #sample = self.replace_samplestates_with_errorstates(sample, self.x_tgt[condition])
        if save:
            self._samples[condition].append(sample)
        self.active = False
        return sample
Exemple #9
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed (try import)
            if isinstance(policy, TfPolicy):        # False, policy = Linear Gaussian policy 
                self._init_tf(policy.dU)

        # Reset the agent for a particular experiment condition
        self.reset(condition)

        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand() # ROS message 
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise) # ControllerParams
        trial_command.T = self.T                                # Trajectory length
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']    # Controller frequency
        
        ee_points = self._hyperparams['end_effector_points']        # 3*n_points array containing offsets
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist() #  3*n_points array containing the desired ee_points for this trial
        
        trial_command.state_datatypes = self._hyperparams['state_include']  # Which data types to include in state
        trial_command.obs_datatypes = self._hyperparams['state_include']    # Which data types to include in observation

        # ------------- Local Policy -------------
        # use_tf is False 
        if self.use_tf is False:
            # self._trial_service = ServiceEmulator(
            #     self._hyperparams['trial_command_topic'], TrialCommand,
            #     self._hyperparams['sample_result_topic'], SampleResult
            # )
            
            # Publish a message and wait for the response
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
            
            # Saving the samples for tf[?]
            if save:
                self._samples[condition].append(sample)
            
            return sample
        else:
            self._trial_service.publish(trial_command)

            # Run an async controller from a policy. 
            # The async controller receives observations from ROS subscribers
            # and then uses them to publish actions
            sample_msg = self.run_trial_tf(policy, 
                time_to_run=self._hyperparams['trial_timeout'])
            
            sample = msg_to_sample(sample_msg, self)
            
            if save:
                self._samples[condition].append(sample)
            return sample
Exemple #10
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']

        trial_command.obs_datatypes = self._hyperparams['obs_include']

        if self.use_tf is False or not isinstance(policy, TfPolicy):
            print 'Not using TF controller'
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
        else:
            '''
            print 'Using TF controller'
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(policy, condition, time_to_run=self._hyperparams['trial_timeout'])
            pdb.set_trace()
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
            '''
            self.trial_manager.prep(policy, condition)
            self._trial_service.publish(trial_command, wait=True)
            self.trial_manager.run(self._hyperparams['trial_timeout'])
            while self._trial_service._waiting:
                print 'Waiting for sample to come in'
                rospy.sleep(1.0)
            sample_msg = self._trial_service._subscriber_msg

        sample = msg_to_sample(sample_msg, self)
        sample.set(NOISE, noise)
        sample.set(TIMESTEP, np.arange(self.T).reshape((self.T,1)))

        return sample