Beispiel #1
0
    def sample(self, policy, condition, verbose=False, save=True, noisy=True):
        """
        Runs a trial and constructs a new sample containing information
        about the trial.

        Args:
            policy: Policy to to used in the trial.
            condition (int): Which condition setup to run.
            verbose (boolean): Whether or not to plot the trial (not used here).
            save (boolean): Whether or not to store the trial into the samples.
            noisy (boolean): Whether or not to use noise during sampling.
        """
        self._worlds[condition].run()
        self._worlds[condition].reset_world()
        b2d_X = self._worlds[condition].get_state()
        new_sample = self._init_sample(b2d_X)
        U = np.zeros([self.T, self.dU])
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))
        for t in range(self.T):
            X_t = new_sample.get_X(t=t)
            obs_t = new_sample.get_obs(t=t)
            U[t, :] = policy.act(X_t, obs_t, t, noise[t, :])
            if (t+1) < self.T:
                for _ in range(self._hyperparams['substeps']):
                    self._worlds[condition].run_next(U[t, :])
                b2d_X = self._worlds[condition].get_state()
                self._set_sample(new_sample, b2d_X, t)
        new_sample.set(ACTION, U)
        if save:
            self._samples[condition].append(new_sample)
Beispiel #2
0
 def sample(self, policy, condition, verbose=True, save=True, noisy=True):
     """
     Runs a trial and constructs a new sample containing information
     about the trial.
     Args:
         policy: Policy to to used in the trial.
         condition: Which condition setup to run.
         verbose: Whether or not to plot the trial.
         save: Whether or not to store the trial into the samples.
         noisy: Whether or not to use noise during sampling.
     """
     # Create new sample, populate first time step.
     mj_X = self._hyperparams['x0'][condition]
     U = np.zeros([self.T, self.dU])
     if noisy:
         noise = generate_noise(self.T, self.dU, self._hyperparams)
     else:
         noise = np.zeros((self.T, self.dU))
     if np.any(self._hyperparams['x0var'][condition] > 0):
         x0n = self._hyperparams['x0var'] * \
                 np.random.randn(self._hyperparams['x0var'].shape)
         mj_X += x0n
     noisy_body_idx = self._hyperparams['noisy_body_idx'][condition]
     if noisy_body_idx.size > 0:
         for i in range(len(noisy_body_idx)):
             idx = noisy_body_idx[i]
             var = self._hyperparams['noisy_body_var'][condition][i]
             self._model[condition]['body_pos'][idx, :] += \
                     var * np.random.randn(1, 3)
     self._world[condition].set_model(self._model[condition])
     if self._linear:
       dt = self._hyperparams['dt']
       F = np.array([[ 1, 0, dt, 0, dt**2., 0], [0, 1, 0, dt, 0, dt**2.],
                     [0, 0, 1, 0, dt, 0], [0, 0, 0, 1, 0, dt]])
     new_sample = self._init_sample(condition)
     for t in range(self.T):
         X_t = new_sample.get_X(t=t)
         obs_t = new_sample.get_obs(t=t)
         mj_U = policy.act(X_t, obs_t, t, noise[t, :])
         U[t, :] = mj_U
         if verbose:
             self._world[condition].plot(mj_X)
         if (t + 1) < self.T:
             for _ in range(self._hyperparams['substeps']):
               if self._linear:
                 mj_X = F.dot(np.r_[mj_X, mj_U])
               else:
                 mj_X, _ = self._world[condition].step(mj_X, mj_U)
             #TODO: Some hidden state stuff will go here.
             self._data = self._world[condition].get_data()
             self._set_sample(new_sample, mj_X, t, condition)
     new_sample.set(ACTION, U)
     if save:
         self._samples[condition].append(new_sample)
     return new_sample
Beispiel #3
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
Beispiel #4
0
 def sample(self, policy, condition, verbose=True, save=True, noisy=True):
     """
     Runs a trial and constructs a new sample containing information
     about the trial.
     Args:
         policy: Policy to to used in the trial.
         condition: Which condition setup to run.
         verbose: Whether or not to plot the trial.
         save: Whether or not to store the trial into the samples.
         noisy: Whether or not to use noise during sampling.
     """
     # Create new sample, populate first time step.
     feature_fn = None
     if 'get_features' in dir(policy):
         feature_fn = policy.get_features
     new_sample = self._init_sample(condition, feature_fn=feature_fn)
     mj_X = self._hyperparams['x0'][condition]
     U = np.zeros([self.T, self.dU])
     if noisy:
         noise = generate_noise(self.T, self.dU, self._hyperparams)
     else:
         noise = np.zeros((self.T, self.dU))
     if np.any(self._hyperparams['x0var'][condition] > 0):
         x0n = self._hyperparams['x0var'] * \
                 np.random.randn(self._hyperparams['x0var'].shape)
         mj_X += x0n
     noisy_body_idx = self._hyperparams['noisy_body_idx'][condition]
     if noisy_body_idx.size > 0:
         for i in range(len(noisy_body_idx)):
             idx = noisy_body_idx[i]
             var = self._hyperparams['noisy_body_var'][condition][i]
             self._model[condition]['body_pos'][idx, :] += \
                     var * np.random.randn(1, 3)
     # Take the sample.
     for t in range(self.T):
         X_t = new_sample.get_X(t=t)
         obs_t = new_sample.get_obs(t=t)
         mj_U = policy.act(X_t, obs_t, t, noise[t, :])
         U[t, :] = mj_U
         if verbose:
             self._world[condition].plot(mj_X)
         if (t + 1) < self.T:
             for _ in range(self._hyperparams['substeps']):
                 mj_X, _ = self._world[condition].step(mj_X, mj_U)
             self._data = self._world[condition].get_data()
             self._set_sample(new_sample, mj_X, t, condition, feature_fn=feature_fn)
     new_sample.set(ACTION, U)
     new_sample.set(NOISE, noise)
     if save:
         self._samples[condition].append(new_sample)
     return new_sample
Beispiel #5
0
    def execute(self, policy):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        #self.reset(condition)
        # Generate noise.
        noise = generate_noise(self.T, self.dU, self._hyperparams)

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
#### @todo: tgt command try ##########Brook####################################################
        #trial_command.ee_points_tgt = self._tgt_subscribe.subscribe_and_wait()


        trial_command.ee_points_tgt = \
            self._hyperparams['ee_points_tgt'][0].tolist()

        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout'])
            sample = msg_to_sample(sample_msg, self)
Beispiel #6
0
    def sample(self, policy, condition, verbose=True, save=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
        Returns:
            sample: A Sample object.
        """
        self.reset(condition)

        # Generate noise.
        noise = generate_noise(self.T, self.dU, self._hyperparams)

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']
        sample_msg = self._trial_service.publish_and_wait(
            trial_command, timeout=self._hyperparams['trial_timeout']
        )

        sample = msg_to_sample(sample_msg, self)
        if save:
            self._samples[condition].append(sample)
        return sample
Beispiel #7
0
class AgentBusPol(Agent):
	"""
	All communication between the algorithms and Vehicle is done through
	this class.
	"""
	def __init__(self, hyperparams):
		config = deepcopy(AGENT_BUS)
		config.update(hyperparams)
		Agent.__init__(self, config)
		self._setup_conditions()
		self.reach_start = None
		self.reach_end = None
		self.finishing = None
		self.finishing_time = None
		self._setup_world(self._hyperparams["world"],
						  self._hyperparams["target_state"],
						  self._hyperparams["render"],
						  self._hyperparams["polygons"],
						  self._hyperparams["map_size"],
						  self._hyperparams["map_state"],
						  self._hyperparams["display_center"],)
		

	def _setup_conditions(self):
		"""
		Helper method for setting some hyperparameters that may vary by
		condition.
		"""
		conds = self._hyperparams['conditions']
		# for field in ('x0', 'x0var', 'pos_body_idx', 'pos_body_offset',
        # 	'noisy_body_idx', 'noisy_body_var', 'filename'):
        #     self._hyperparams[field] = setup(self._hyperparams[field], conds)
		self._hyperparams['x0'] = setup(self._hyperparams['x0'], conds)


	def _setup_world(self, world, target_state, render, polygons, map_size, map_state, display_center):
		"""
		Helper method for handling setup of the Box2D world.
		"""
		self.x0 = self._hyperparams["x0"]  # initial state
		self._worlds = [world(self.x0[i], target_state, render, map_size, polygons=polygons, map_state=map_state, display_center=display_center)
						for i in range(self._hyperparams['conditions'])]
		
	def sample(self, policy, condition, verbose=False, save=True, noisy=True):
		"""
		Runs a trial and constructs a new sample containing information
		about the trial.

		Args:
			policy: Policy to to used in the trial.
			condition (int): Which condition setup to run.
			verbose (boolean): Whether or not to plot the trial (not used here).
			save (boolean): Whether or not to store the trial into the samples.
			noisy (boolean): Whether or not to use noise during sampling.
		"""
        # Modified on April 2, referring to agent_mjc
		# reset the world and assign the initialized state to new_sample
		# self._worlds[condition].run()
		# self._worlds[condition].reset_world()
        feature_fn = None
        if 'get_features' in dir(policy):
            feature_fn = policy.get_features
		b2d_X = self._worlds[condition].get_state()
		new_sample = self._init_sample(b2d_X)
		# initialize a dummy action sequence
		U = np.zeros([self.T, self.dU])		
		self.reach_start = None
		self.reach_end = None	

		if noisy:
			noise = generate_noise(self.T, self.dU, self._hyperparams)
		else:
			noise = np.zeros((self.T, self.dU))
		for t in range(self.T):
			X_t = new_sample.get_X(t=t)
			obs_t = new_sample.get_obs(t=t)
			U[t, :] = policy.act(X_t, obs_t, t, noise[t, :])
			# print(U[t])
			if (t+1) < self.T:
				for _ in range(self._hyperparams['substeps']):
					self._worlds[condition].run_next(U[t, :])
				if self._worlds[condition].reach:
					self._worlds[condition].reach = False
					if self.reach_start == None:
						self.reach_start = t
						# print("reach_start", t)
					elif self.reach_end == None or t>self.reach_end:
						self.reach_end = t
					if t==self.T-2:
						# continue reaching till the end of series
						# print("reach_end", self.reach_end)
						period = self.reach_end - self.reach_start
						# print("reach period", period)
						if period > 3:
							self.finishing = True
							# self.finishing_time = self.reach_end
							self.finishing_time = self.reach_start
				elif self.reach_end == t-1 :
					# just leave
					# print("reach_end", self.reach_end)
					period = self.reach_end - self.reach_start
					# print("reach period", period)
					if period > 1:
						self.finishing = True
						# self.finishing_time = self.reach_end
						self.finishing_time = self.reach_start
						if self.finishing_time == 0:
							self.finishing_time = 1				
				b2d_X = self._worlds[condition].get_state()
				self._set_sample(new_sample, b2d_X, t)			
		
		new_sample.set(ACTION, U)
		if save:
			self._samples[condition].append(new_sample)
		# if self.finishing:
			# print("agent_bus t= ", self.finishing_time)
		return new_sample
    def sample(
        self,
        policy,
        condition,
        save=True,
        noisy=True,
        reset_cond=None,
        randomize_initial_state=0,
        **kwargs,
    ):
        """Performs agent reset and rolls out given policy to collect a sample.

        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
            reset_cond: The initial condition to reset the agent into.
            randomize_initial_state: Perform random steps after resetting to simulate a noisy initial state.

        Returns:
            sample: A Sample object.

        """
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Get a new sample
        sample = Sample(self)

        # Get initial state
        self.env.seed(None if reset_cond is None else self.x0[reset_cond])
        obs = self.env.reset()
        if randomize_initial_state > 0:
            # Take random steps randomize initial state distribution
            self.env._set_action(
                (self.env.action_space.high - self.env.action_space.low) / 12 *
                np.random.normal(size=self.dU) * randomize_initial_state)
            for _ in range(5):
                self.sim.step()
            obs = self.env.step(np.zeros(self.dU))[0]

        self.set_states(sample, obs, 0)
        U_0 = policy.act(sample.get_X(0), sample.get_obs(0), 0, noise)
        sample.set(ACTION, U_0, 0)
        for t in range(1, self.T):
            if self.render:
                self.env.render(mode='human')

            # Get state
            obs, _, done, _ = self.env.step(sample.get_U(t - 1))
            self.set_states(sample, obs, t)

            # Get action
            U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise)
            sample.set(ACTION, U_t, t)

            if done and t < self.T - 1:
                raise Exception('Iteration ended prematurely %d/%d' %
                                (t + 1, self.T))
        if save:
            self._samples[condition].append(sample)
        return sample
Beispiel #9
0
    def sample(self,
               policy,
               condition,
               save=True,
               noisy=True,
               reset_cond=None,
               **kwargs):
        """Performs agent reset and rolls out given policy to collect a sample.

        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
            reset_cond: The initial condition to reset the agent into.

        Returns:
            sample: A Sample object.

        """
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        sample = Sample(self)
        self.reset(reset_cond)

        # Execute policy over a time period of [0,T]
        start = time.time()
        for t in range(self.T):
            # Read sensors and store sensor data in sample
            latest_sample = self.get_data()
            for sensor_type in self.x_data_types:
                data = latest_sample.get(sensor_type)
                if self.scaler is not None:
                    data = self.__transform(sensor_type, data)
                sample.set(sensor_type, data, t)

            # Compute site Jacobians
            jac = np.tile(self.jac[:3], (3, 1))
            rotation = sp.spatial.transform.Rotation.from_euler(
                "XYZ", -latest_sample.get(END_EFFECTOR_ROTATIONS))
            for i in range(3):
                rot_ee = rotation.apply(self.ee_points[i])
                for k in range(6):
                    jac[i * 3:(i + 1) * 3,
                        k] += np.cross(self.jac[3:, k], rot_ee)
            sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=t)

            # Use END_EFFECTOR_POINTS as distance to target
            sample.set(END_EFFECTOR_POINTS,
                       sample.get(END_EFFECTOR_POINTS, t) -
                       self.ee_points_tgt / self.scaler.scale_[-9:],
                       t=t)

            # Get action
            U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise)
            U_t = np.clip(U_t, -4, 4)

            # Perform action
            self.reset_arm(None, None, U_t, False)
            sample.set(ACTION, U_t, t)

            # Check if agent is keeping up
            sleep_time = start + (t + 1) * self.dt - time.time()
            if sleep_time < 0:
                logging.critical("Agent can't keep up. %fs behind." %
                                 sleep_time)
            elif sleep_time < self.dt / 2:
                logging.warning("Agent may not keep up (%.0f percent busy)" %
                                (((self.dt - sleep_time) / self.dt) * 100))

            # Wait for next timestep
            if sleep_time > 0:
                time.sleep(sleep_time)

        if save:
            self._samples[condition].append(sample)
        self.reset(reset_cond)
        return sample
Beispiel #10
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)
        self.reset()
        self.all_object_traj = {id: np.empty((0, 3)) for id in self.target_ids}
        if self.take_video:
            self.rgb_writer = create_writer(
                self._hyperparams['data_files_dir'],
                classifier='itr_{}'.format(self.idx_curr_itr),
                fps=5)

        X, new_sample = self._init_sample(condition)
        b_X = self._hyperparams['x0'][condition]
        U = np.zeros([self.T, self.dU])
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))
        noise /= 1
        # Take the sample.
        for t in range(self.T):
            curr_time = rospy.get_time()
            X_t = new_sample.get_X(t=t)
            obs_t = new_sample.get_obs(t=t)
            # print('cost: {}'.format(np.linalg.norm(self._hyperparams['cost_tgt'][t, :6] - X[:6])))

            b_U = policy.act(X_t, obs_t, t, noise[t, :])
            # print(b_U)
            U[t, :] = b_U
            if (t + 1) % 10 == 0:
                print('sample policy action at t={}: {}'.format(t, b_U))
            b_U[0:3] = np.clip(
                b_U[0:3], -self._hyperparams['max_velocity'],
                self._hyperparams['max_velocity'])  # clip task space
            b_U[3] = np.clip(b_U[3], -0.4, 0.4)  # clip rotation
            b_U[-1] = np.clip(b_U[-1], -30, 30)  # clip gripper
            b_U *= self._hyperparams['set_action_to_zero']
            # print(_U)
            if (t + 1) < self.T:
                # b_X, b_U_check = self._step(b_U, cTruerr_time)
                # self._step_taskspace_vel(b_U, X, curr_time, t)
                self._step_taskspace(b_U, X, curr_time, t)
                X = self._set_sample(new_sample, t, condition)
        self._trial_gripper.command_position(
            self._hyperparams['gripper_reset_position']
        )  # open gripper at reset (100 is fully opened)

        new_sample.set(ACTION, U)
        new_sample.set(NOISE, noise)

        xx, image = self._get_current_state(t)
        if self.take_video:
            self.rgb_writer.append_data(image)
        self.axes[1, 0].clear()
        self.axes[1, 0].set_title('ee_to_anchor_distance')
        self.axes[1, 0].plot(np.array(self.geom_dist_ee_to_anchor_traj)[:, 0],
                             c='r',
                             label='x')
        self.axes[1, 0].plot(np.array(self.geom_dist_ee_to_anchor_traj)[:, 1],
                             c='g',
                             label='y')
        self.axes[1, 0].plot(np.array(self.geom_dist_ee_to_anchor_traj)[:, 2],
                             c='b',
                             label='z')
        self.axes[1, 0].legend()

        self.axes[1, 1].clear()
        self.axes[1, 1].set_title('object2_to_anchor_distance')
        self.axes[1,
                  1].plot(np.array(self.geom_dist_object2_to_anchor_traj)[:,
                                                                          0],
                          c='r',
                          label='x')
        self.axes[1,
                  1].plot(np.array(self.geom_dist_object2_to_anchor_traj)[:,
                                                                          1],
                          c='g',
                          label='y')
        self.axes[1,
                  1].plot(np.array(self.geom_dist_object2_to_anchor_traj)[:,
                                                                          2],
                          c='b',
                          label='z')
        self.axes[1, 1].legend()

        self.axes[3, 1].clear()
        self.axes[3, 1].set_title('pixel_feature_state')
        self.axes[3, 1].plot(np.array(self.feature_traj), c='r')
        self.axes[3, 1].legend()

        self.geom_dist_ee_to_anchor_traj = []
        self.geom_dist_object2_to_anchor_traj = []
        self.feature_traj = []

        # print("reached endpoint: ",sxx[-6:].tolist())

        print('finished rollout {}'.format(self.idx_curr_rollout + 1))

        self.idx_curr_rollout += 1
        if self.take_video:
            self.rgb_writer.close()

        if save:
            self._samples[condition].append(new_sample)

        return new_sample
Beispiel #11
0
    def sample(self,
               policy,
               condition,
               verbose=True,
               save=True,
               noisy=False,
               screenshot_prefix=None,
               superball_parameters=None):
        if superball_parameters is None:
            superball_parameters = {}
        sample_params = copy.deepcopy(SUPERBALL_DEFAULT_SAMPLE_PARAMETERS)
        sample_params.update(superball_parameters)
        rospy.set_param('/verbose_trial', int(verbose))
        if screenshot_prefix:
            import pyscreenshot

        if sample_params['horizon'] is not None:
            # We don't save the sample if the horizon is customly defined
            horizon = sample_params['horizon']
            save = False
        else:
            horizon = self.T

        gain = sample_params['motor_position_control_gain']

        # Reset or relax
        if sample_params['reset']:
            self.reset(0, sample_params['bottom_face'],
                       sample_params['start_motor_positions'])
        elif sample_params['relax']:
            self.relax()
        new_sample = self._init_sample(horizon)
        U = np.zeros([horizon, self.dU])
        noise = generate_noise(horizon, self.dU, self._hyperparams)
        for t in range(horizon):
            X_t = new_sample.get_X(t=t)
            obs_t = new_sample.get_obs(t=t)
            U_t = policy.act(X_t, obs_t, t, noise[t, :])

            if sample_params['debug'] and t >= horizon - 1:
                sys.stdout.write('[')
                for elem in X_t[X_t.shape[0] - 24:X_t.shape[0] - 12]:
                    sys.stdout.write('{}, '.format(elem))
                sys.stdout.write('],\n')

            U[t, :] = U_t
            if (t + 1) < horizon:
                if self._hyperparams['ctrl_vel']:
                    self._set_motor_velocities(U_t)
                else:
                    self._set_motor_positions(U_t)
                self._advance_simulation()
                if screenshot_prefix:
                    img = screenshot_prefix + '_' + str(t).zfill(3) + '.png'
                    pyscreenshot.grab(bbox=(65, 50, 705, 530)).save(img)
                self._set_sample(new_sample, t)
        new_sample.set(ACTION, U)
        if save:
            self._samples[condition].append(new_sample)

        return new_sample
Beispiel #12
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if self.vid_seqname % 4 == 0:
            rgb_writer = self.create_writers()
        else:
            rgb_writer = None
        ## RESET ROBOT!!!! <--- implement that!!
        self.env.reset(self.reset_condition)
        new_sample, image_data = self._init_sample(condition)
        #mj_X = self._hyperparams['x0'][condition]  # = b_X
        U = np.zeros([self.T, self.dU])

        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        noise *= 1

        gripper_persistency_counter = 0
        allow_gripper_change = False

        # Take the sample.
        for t in range(self.T):  # 100 steps
            if rgb_writer is not None:
                rgb_writer.append_data(image_data[0])

            X_t = new_sample.get_X(t=t)
            obs_t = new_sample.get_obs(t=t)
            mj_U = policy.act(X_t, obs_t, t,
                              noise[t, :])  # get actions from policy

            # mj_U[:3] /= 10
            # mj_U[3] /= 10000
            # mj_U[0] = 0
            mj_U[3] = 0
            eepos = p.getLinkState(
                self.o.kukaobject.kukaId,
                self.o.kukaobject.kukaEndEffectorIndex)[
                    0]  # is that correct?? or is it another index??
            if eepos[2] + mj_U[2] < 0.8200000357627868:  # Prevent hitting table
                mj_U[2] = 0
            if eepos[1] + mj_U[2] > 0.000000357627868:  # Prevent going too far
                mj_U[1] = 0
            delta = self.hyperparams['delta_taskspace']
            self.taskspace_deltas = np.array([delta, delta, delta])

            # mj_U = np.clip(mj_U, -delta, delta)
            norm = np.sqrt((np.sum(mj_U**2)))

            if norm >= delta:
                mj_U = mj_U * delta / norm
                print("saturated action")

            norm_check = np.sqrt((np.sum(mj_U**2)))
            # print(mj_U)

            #mj_U = np.clip(mj_U, -self.taskspace_deltas, self.taskspace_deltas)

            U[t, :] = mj_U
            #mj_U[7] *= 7
            if gripper_persistency_counter > 10 and gripper_persistency_counter < 50:
                allow_gripper_change = True
                gripper_persistency_counter = 60

            gripper_persistency_counter += 1  # check if gripper is in same position as last time step

            if (t + 1) < self.T:
                curr_time = time.time()

                # print("step {}".format(t))
                ### step simulation with mj_X and mj_U

                if self._hyperparams['control_type'] == 'task':
                    self.step_taskspace_trans(mj_U, allow_gripper_change)
                    allow_gripper_change = False

                else:
                    self.step_jointspace(mj_U)
                if t >= self.T - 3:
                    self.o.kukaobject.open_gripper()

                #print(np.linalg.norm(object_p3d[:3]))
                #print(object_p3d[:3])
                run_time = time.time() - curr_time
                #print("runtime: {}".format(run_time))
                stateX, jac_t, image_data = self.get_state(t)
                time.sleep(max(self._hyperparams['dt'] - run_time, 0.0))
                self._set_sample(
                    new_sample, stateX, jac_t, t,
                    condition)  # is jac_t correct or should it be jac_r??

        new_sample.set(ACTION, U)
        new_sample.set(NOISE, noise)
        if save:
            self._samples[condition].append(new_sample)
        self.vid_seqname += 1
        if rgb_writer is not None:
            rgb_writer.close()
        # print("distance target to anchor: {}, ground truth distance: {}".format(np.linalg.norm(new_sample.get(OBJECT_POSE, t=self.T-1)), np.linalg.norm(self.hyperparams['debug_cost_tgt'][-1])))
        return new_sample
Beispiel #13
0
    def sample(self,
               policy,
               condition,
               iteration,
               verbose=True,
               save=True,
               noisy=True):
        """
        Runs a trial and constructs a new sample containing information
        about the trial.
        Args:
            policy: Policy to to used in the trial.
            condition: Which condition setup to run.
            verbose: Whether or not to plot the trial.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        """
        # Create new sample, populate first time step.
        feature_fn = None
        if 'get_features' in dir(policy):
            feature_fn = policy.get_features
        # noisy = False
        ## TODO : where below line should be located?
        new_sample = self._init_sample(condition, feature_fn=feature_fn)
        mj_X = self._hyperparams['x0'][condition]
        U = np.zeros([self.T, self.dU])
        U_origin = np.zeros([self.T, self.dU])

        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        if np.any(self._hyperparams['x0var'][condition] > 0):
            x0n = self._hyperparams['x0var'] * \
                    np.random.randn(self._hyperparams['x0var'].shape)
            mj_X += x0n
        noisy_body_idx = self._hyperparams['noisy_body_idx'][condition]
        if noisy_body_idx.size > 0:
            for i in range(len(noisy_body_idx)):
                idx = noisy_body_idx[i]
                var = self._hyperparams['noisy_body_var'][condition][i]
                self._model[condition]['body_pos'][idx, :] += \
                        var * np.random.randn(1, 3)

        torq_max1 = 45  # origin 87
        torq_max2 = 6  # origin 12

        # self.panda.set_force_threshold_for_collision([20, 20, 10, 25, 25, 25]) # X,Y,Z,R,P,Y
        self.panda.set_collision_threshold(
            cartesian_forces=[20, 20, 10, 25, 25, 25])  # X,Y,Z,R,P,Y
        while True:
            # panda : move to joint position

            ## TODO : where below line should be located?
            # new_sample = self._init_sample(condition, feature_fn=feature_fn)   # new_sample: class 'Sample'

            try:
                # self.panda.enable_robot()
                if not self.panda.is_enabled_robot():
                    raise StopIteration
                self.panda.move_to_joint_position(
                    self._hyperparams['x0'][condition][0:7])
                time.sleep(2)

                # Take the sample.
                for t in range(self.T):
                    X_t = new_sample.get_X(t=t)
                    obs_t = new_sample.get_obs(t=t)
                    mj_U = policy.act(X_t, obs_t, t, noise[t, :])
                    mj_U_origin = mj_U.copy()
                    for i in range(len(mj_U)):
                        if i < 4 and mj_U[i] > torq_max1:
                            mj_U[i] = torq_max1
                        elif i < 4 and mj_U[i] < -torq_max1:
                            mj_U[i] = -torq_max1
                        elif i >= 4 and mj_U[i] > torq_max2:
                            mj_U[i] = torq_max2
                        elif i >= 4 and mj_U[i] < -torq_max2:
                            mj_U[i] = -torq_max2

                    U[t, :] = mj_U
                    U_origin[t, :] = mj_U_origin
                    # print 'mj_U: ', mj_U
                    # print 'mj_U dict: ', self.list_to_dict(mj_U)

                    if (t + 1) < self.T:
                        # self.panda.enable_robot()
                        # for _ in range(self._hyperparams['substeps']):

                        if self.panda.has_collided():
                            raise StopIteration

                        if not self.panda.is_enabled_robot():
                            raise StopIteration
                        # panda move with mj_U
                        # self.panda.set_joint_velocities(self.list_to_dict(mj_U))
                        # self.panda.exec_velocity_cmd(mj_U)
                        self.panda.exec_torque_cmd(mj_U)
                        # self.panda.exec_position_cmd(mj_U)

                        print("current step(t): ", t)
                        self._set_sample(new_sample,
                                         mj_X,
                                         t,
                                         condition,
                                         feature_fn=feature_fn)

                    self.r.sleep()  # to sample data at some frequency

                for i in range(
                        15
                ):  # Torque commands for allowing robot finish the trajectory
                    self.panda.exec_torque_cmd([0, 0, 0, 0, 0, 0, 0])
                time.sleep(1)

                break

            except StopIteration:
                print("robot stopped!!!")
                self.panda.enable_robot()
                time.sleep(2)
                continue

            finally:
                f = '/home/panda_gps/gps/experiments/panda_test_dongju/action_origin_' + str(
                    iteration) + '.npy'
                np.save(f, U_origin)
                f = '/home/panda_gps/gps/experiments/panda_test_dongju/action_clipped_' + str(
                    iteration) + '.npy'
                np.save(f, U)

        new_sample.set(ACTION, U)
        new_sample.set(NOISE, noise)
        if save:
            self._samples[condition].append(new_sample)
        return new_sample
Beispiel #14
0
    def sample(
        self,
        policy,
        condition,
        verbose=True,
        save=True,
        noisy=True,
        use_TfController=False,
        timeout=None,
        reset_cond=None,
        record=False
    ):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
            use_TfController: Whether to use the syncronous TfController
        Returns:
            sample: A Sample object.
        """

        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Get a new sample
        sample = Sample(self)

        self.env.video_callable = lambda episode_id, record=record: record
        # Get initial state
        self.env.seed(None if reset_cond is None else self.x0[reset_cond])
        obs = self.env.reset()
        if self._hyperparams.get('initial_step', 0) > 0:
            # Take one random step to get a slightly random initial state distribution
            U_initial = (self.env.action_space.high - self.env.action_space.low
                        ) / 12 * np.random.normal(size=self.dU) * self._hyperparams['initial_step']
            obs = self.env.step(U_initial)[0]
        self.set_states(sample, obs, 0)
        U_0 = policy.act(sample.get_X(0), sample.get_obs(0), 0, noise)
        sample.set(ACTION, U_0, 0)
        for t in range(1, self.T):
            if not record and self.render:
                self.env.render(mode='human')  # TODO add hyperparam

            # Get state
            obs, _, done, _ = self.env.step(sample.get_U(t - 1))
            self.set_states(sample, obs, t)

            # Get action
            U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise)
            sample.set(ACTION, U_t, t)

            if done and t < self.T - 1:
                raise Exception('Iteration ended prematurely %d/%d' % (t + 1, self.T))
        if save:
            self._samples[condition].append(sample)
        self.active = False
        #print("X", sample.get_X())
        #print("U", sample.get_U())
        return sample
Beispiel #15
0
    def sample(self, policy, condition, iteration, verbose=True, save=True, noisy=True):
        """
        Runs a trial and constructs a new sample containing information
        about the trial.
        Args:
            policy: Policy to to used in the trial.
            condition: Which condition setup to run.
            verbose: Whether or not to plot the trial.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        """
        # Create new sample, populate first time step.
        feature_fn = None
        if 'get_features' in dir(policy):
            feature_fn = policy.get_features
        
        new_sample = self._init_sample(condition, feature_fn=feature_fn)
        mj_X = self._hyperparams['x0'][condition]
        U = np.zeros([self.T, self.dU])

        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))
        
        if np.any(self._hyperparams['x0var'][condition] > 0):
            x0n = self._hyperparams['x0var'] * \
                    np.random.randn(self._hyperparams['x0var'].shape)
            mj_X += x0n
        noisy_body_idx = self._hyperparams['noisy_body_idx'][condition]
        
        if noisy_body_idx.size > 0:
            for i in range(len(noisy_body_idx)):
                idx = noisy_body_idx[i]
                var = self._hyperparams['noisy_body_var'][condition][i]
                self._model[condition]['body_pos'][idx, :] += \
                        var * np.random.randn(1, 3)
        
        ### add a function to move indy robot to initial position.
        self.indy.joint_move_to(self._hyperparams[‘x0’][condition][0:6])
        ###
        time.sleep(2)
        
        # Take the sample.
        for t in range(self.T):
            X_t = new_sample.get_X(t=t)
            obs_t = new_sample.get_obs(t=t)
            
            mj_U = policy.act(X_t, obs_t, t, noise[t, :])
            U[t, :] = mj_U
                        
            if (t + 1) < self.T:
                ### add a function that send a torque command to an indy robot.
                self.indy.joint_move_to(mj_U)
                ###

                print("current step(t): ", t)
                self._set_sample(new_sample, mj_X, t, condition, feature_fn=feature_fn)
            
            self.r.sleep() # to sample data at some frequency
        
        new_sample.set(ACTION, U)
        new_sample.set(NOISE, noise)

        if save:
            self._samples[condition].append(new_sample)
        return new_sample
    def sample(self,
               policy,
               condition,
               save=True,
               noisy=True,
               reset_cond=None,
               **kwargs):
        """Performs agent reset and rolls out given policy to collect a sample.

        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
            reset_cond: The initial condition to reset the agent into.

        Returns:
            sample: A Sample object.

        """
        # Get a new sample
        sample = Sample(self)
        sample_ok = False
        while not sample_ok:
            if not self.debug:
                self.reset(reset_cond)

            self.__init_opcua()

            if noisy:
                noise = generate_noise(self.T, self.dU, self._hyperparams)
            else:
                noise = None

            # Execute policy over a time period of [0,T]
            start = time.time()
            for t in range(self.T):
                # Read sensors and store sensor data in sample
                def store_sensor(sensor):
                    sample.set(sensor, self.read_sensor(sensor), t)

                self.pool.map(store_sensor, self.sensors)
                # Override sensors
                for override in self.sensor_overrides:
                    if override['condition'](t):
                        sensor = override['sensor']
                        sample.set(sensor, override['value'](sample, t), t)

                print('X_%02d' % t, sample.get_X(t))

                # Get action
                U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise)

                # Override actuators
                for override in self.actuator_overrides:
                    if override['condition'](t):
                        actuator = override['actuator']
                        U_t[self._u_data_idx[actuator]] = np.copy(
                            override['value'])

                # Send signals
                self.send_signals(t)

                # Perform action
                for actuator in self._u_data_idx:
                    self.write_actuator(actuator,
                                        U_t[self._u_data_idx[actuator]])
                sample.set(ACTION, U_t, t)

                print('U_%02d' % t, U_t)

                # Check if agent is keeping up
                sleep_time = start + (t + 1) * self.dt - time.time()
                if sleep_time < 0:
                    logging.critical("Agent can't keep up. %fs behind." %
                                     sleep_time)
                elif sleep_time < self.dt / 2:
                    logging.warning(
                        "Agent may not keep up (%.0f percent busy)" %
                        (((self.dt - sleep_time) / self.dt) * 100))

                # Wait for next timestep
                if sleep_time > 0 and not self.debug:
                    time.sleep(sleep_time)
            if save:
                self._samples[condition].append(sample)
            self.finalize_sample()

            sample_ok = self.debug or input('Continue?') == 'y'
            if not sample_ok:
                print('Repeating')
        return sample
Beispiel #17
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Runs a trial and constructs a new sample containing information
        about the trial.
        Args:
            policy: Policy to to used in the trial.
            condition: Which condition setup to run.
            verbose: Whether or not to plot the trial.
            save: Whether or not to store the trial into the samples.
        """
        # Create new sample, populate first time step.
        #self._init_tf(policy.dU)
        feature_fn = None
        if 'get_features' in dir(policy):
            feature_fn = policy.get_features

        mj_X = self._hyperparams['x0'][condition]
        U = np.zeros([self.T, self.dU])

        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Not called
        if np.any(self._hyperparams['x0var'][condition] > 0):
            x0n = self._hyperparams['x0var'] * \
                    np.random.randn(self._hyperparams['x0var'].shape)
            mj_X += x0n
        noisy_body_idx = self._hyperparams['noisy_body_idx'][condition]

        # Not called
        if noisy_body_idx.size > 0:
            for i in range(len(noisy_body_idx)):
                idx = noisy_body_idx[i]
                var = self._hyperparams['noisy_body_var'][condition][i]


                self._model[condition]['body_pos'][idx, :] += \
                        var * np.random.randn(1, 3)

        # self._world[condition].set_model(self._model[condition])

        ## INIT BAXTER
        #self.baxter.move_baxter_to_joint_positions([0.32, -0.71, 0.68, 1.09, 0.07, 0.76, 0.13])   # for ball_punching task
        #self.baxter.move_baxter_to_joint_positions([0.27, -1.14, 0.98, 1.60, 0.15, 0.51, 0.27])

        self.baxter.move_baxter_to_joint_positions(
            self._hyperparams['x0'][condition][0:7])

        new_sample = self._init_sample(
            condition, feature_fn=feature_fn)  # new_sample: class 'Sample'

        for t in range(self.T):
            X_t = new_sample.get_X(t=t)
            obs_t = new_sample.get_obs(t=t)

            # set the ACTION for the bot gotten from the policy calculations, and apply.
            print policy
            #mj_U = policy.act(X_t, obs_t, t, noise[t, :])
            mj_U = policy.act(X_t, obs_t, t, noise[t, :], condition)
            print mj_U
            U[t, :] = mj_U

            # print 'the action to take in step ' + str(t) + ' is: ' + str(mj_U)

            # if verbose:
            #     self._world[condition].plot(mj_X)

            # every step but the last
            if (t + 1) < self.T:
                for _ in range(self._hyperparams['substeps']):

                    # This is the call to mjcpy to set the robot
                    # mj_X, _ = self._world[condition].step(mj_X, mj_U)

                    # Set the baxter joint velocities through the Baxter API
                    self.baxter.set_baxter_joint_velocities(mj_U)
                    #self.baxter.set_baxter_joint_positions(mj_U)
                    #self.baxter.set_baxter_joint_torques(mj_U)

                    #print "mj_U: ", mj_U
                    #print "mj_U.shape; ", mj_U.shape

                    # mj_X[self._joint_idx] = self.baxter.get_baxter_joint_angles_positions()
                    # mj_X[self._vel_idx] = self.baxter.get_baxter_joint_angles_velocities()

                    # print 'here is mj_X: ', mj_X
                    # mj_X = self.baxter.get_baxter_joint_angles()

                #TODO: Some hidden state stuff will go here.
                # self._data = self._world[condition].get_data()
                #time.sleep(1)
                print "\ncurrent step(t): ", t
                self._set_sample(new_sample,
                                 mj_X,
                                 t,
                                 condition,
                                 feature_fn=feature_fn)

        new_sample.set(ACTION, U)
        new_sample.set(NOISE, noise)

        if save:
            self._samples[condition].append(new_sample)
        return new_sample
Beispiel #18
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """

        rgb_writer = self.create_writers()

        orn = [0.70603903128, 0.708148792076, 0, 0]
        #orn = p.getQuaternionFromEuler([-math.pi/2,0, math.pi/2])
        pos = [1.0, -0.400000, 0.9]

        self.env.reset(pos + orn)

        new_sample, image_data = self._init_sample(condition)
        U = np.zeros([self.T, self.dU])

        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Take the sample.
        for t in range(self.T):  # 100 steps
            if rgb_writer is not None:
                rgb_writer.append_data(image_data[0])
            X_t = new_sample.get_X(t=t)
            obs_t = new_sample.get_obs(t=t)
            mj_U = policy.act(X_t, obs_t, t,
                              noise[t, :])  # get actions from policy
            U[t, :] = mj_U

            eepos = p.getLinkState(
                self.o.kukaobject.kukaId,
                self.o.kukaobject.kukaEndEffectorIndex)[
                    0]  # is that correct?? or is it another index??

            deltas = self._hyperparams['delta_taskspace']
            mj_U = self.clip_actions(eepos, mj_U, deltas)

            if (t + 1) < self.T:
                curr_time = time.time()

                if self._hyperparams['control_type'] == 'task':
                    self.step_taskspace_trans(mj_U, allow_gripper_change)
                    allow_gripper_change = False

                else:
                    self.step_jointspace(mj_U)

                stateX, jac_t, image_data = self.get_state(t)
                run_time = time.time() - curr_time

                time.sleep(max(self._hyperparams['dt'] - run_time, 0.0))
                self._set_sample(
                    new_sample, stateX, jac_t, t,
                    condition)  # is jac_t correct or should it be jac_r??

        # new_sample.set(ACTION, U)
        # new_sample.set(NOISE, noise
        #     )
        self.vid_seqname += 1
        if rgb_writer is not None:
            rgb_writer.close()

        # if save:
        #     self._samples[condition].append(new_sample)

        return new_sample
Beispiel #19
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        self.reset(condition)
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)
        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']

        trial_command.obs_datatypes = self._hyperparams['obs_include']

        if self.use_tf is False or not isinstance(policy, TfPolicy):
            print 'Not using TF controller'
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
        else:
            '''
            print 'Using TF controller'
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(policy, condition, time_to_run=self._hyperparams['trial_timeout'])
            pdb.set_trace()
            sample = msg_to_sample(sample_msg, self)
            if save:
                self._samples[condition].append(sample)
            return sample
            '''
            self.trial_manager.prep(policy, condition)
            self._trial_service.publish(trial_command, wait=True)
            self.trial_manager.run(self._hyperparams['trial_timeout'])
            while self._trial_service._waiting:
                print 'Waiting for sample to come in'
                rospy.sleep(1.0)
            sample_msg = self._trial_service._subscriber_msg

        sample = msg_to_sample(sample_msg, self)
        sample.set(NOISE, noise)
        sample.set(TIMESTEP, np.arange(self.T).reshape((self.T,1)))

        return sample
Beispiel #20
0
    def sample(self, policy, condition, verbose=False, save=True, noisy=True):
        """
        Runs a trial and constructs a new sample containing information
        about the trial.

        Args:
            policy: Policy to be used in the trial.
            condition (int): Which condition setup to run.
            verbose (boolean): Whether or not to plot the trial (not used here).
            save (boolean): Whether or not to store the trial into the samples.
            noisy (boolean): Whether or not to use noise during sampling.
        """

        print('taking sample')

        self.restart_simulation()

        vrep_X = self.retrieve_state()  # get simulation state
        new_sample = self._init_sample(
            vrep_X
        )  # initialise sample with this world state at initial time step
        U = np.zeros(
            [self.T, self.dU]
        )  # initialise episode action vector dims with episode time span and action space dim

        if noisy:
            noise = generate_noise(
                self.T, self.dU, self._hyperparams
            )  # Generate a T x dU gaussian-distributed noise vector
        else:
            noise = np.zeros((self.T, self.dU))  # vector of zeros

        for t in range(self.T):  # iterate over episode time-steps
            X_t = new_sample.get_X(
                t=t
            )  # get state vector of joint angles, joint velocities, and 3D end-effector point concatended for current time-step
            obs_t = new_sample.get_obs(
                t=t)  # get NULL observation for simple trajectory optimisation
            U[t, :] = policy.act(
                X_t, obs_t, t, noise[t, :]
            )  # return action for current state, and fill in entry of U vector for time step t
            if (
                    t + 1
            ) < self.T:  # provided we are not on the final iteration of the for loop
                for _ in range(
                        self._hyperparams['substeps']
                ):  # iterate over sub_steps, (i.e. how much frame skipping is there for repeating actions)
                    self.step_simulation(U[t, :])
                vrep_X = self.retrieve_state()  # get simulation state
                self._set_sample(
                    new_sample, vrep_X,
                    t)  # add this information to new_sample object
        self.stop_simulation()
        new_sample.set(
            ACTION, U
        )  # having run to end of trajectory, set saved action vector U as ACTION data in new_sample _data, to provide full sample information in new_sample object
        if save:  # if want to save sample to agent object
            self._samples[condition].append(
                new_sample)  # append samples to agent object _samples variable

        return new_sample  # 0: actions 1: joint angles 2: joint velocities 3: end-effector points (3D)
Beispiel #21
0
    def sample(self,
               policy,
               condition,
               verbose=True,
               save=True,
               noisy=True,
               use_TfController=False,
               first_itr=False,
               timeout=None,
               reset=True,
               rnd=None):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
            use_TfController: Whether to use the syncronous TfController
        Returns:
            sample: A Sample object.
        """
        if use_TfController:
            self._init_tf(policy, policy.dU)
            self.use_tf = True
            self.cur_timestep = 0
            self.sample_save = save
            self.active = True

        self.policy = policy

        if reset:
            self.reset(condition, rnd=rnd)
            self.condition = condition

        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
            self.noise = noise
        else:
            noise = np.zeros((self.T, self.dU))
            self.noise = None

        # Fill in trial command
        trial_command = TrialCommand()
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = \
                policy_to_msg(policy, noise, use_TfController=use_TfController)
        if timeout is not None:
            trial_command.T = timeout
        else:
            trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][self.condition].tolist()
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        # Execute trial.
        sample_msg = self._trial_service.publish_and_wait(
            trial_command,
            timeout=(trial_command.T + self._hyperparams['trial_timeout']))
        if self.vision_enabled:
            sample_msg = self.add_rgb_stream_to_sample(sample_msg)
        sample = msg_to_sample(sample_msg, self)
        #sample = self.replace_samplestates_with_errorstates(sample, self.x_tgt[condition])
        if save:
            self._samples[condition].append(sample)
        self.active = False
        return sample
Beispiel #22
0
    def sample(self,
               itr,
               policy,
               condition,
               verbose=True,
               save=True,
               noisy=True):
        """
        Runs a trial and constructs a new sample containing information
        about the trial.
        Args:
            itr : to name data file with iteration number, can erase when it is not neccessary
            policy: Policy to be used in the trial.
            condition: Which condition setup to run.
            verbose: Whether or not to plot the trial.
            save: Whether or not to store the trial into the samples.
        """
        img = []
        fp = []
        obs = []

        # Create new sample, populate first time step.
        #self._init_tf(policy.dU)
        feature_fn = None
        if 'get_features' in dir(policy):
            feature_fn = policy.get_features

        mj_X = self._hyperparams['x0'][condition]
        U = np.zeros([self.T, self.dU])

        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Not called
        if np.any(self._hyperparams['x0var'][condition] > 0):
            x0n = self._hyperparams['x0var'] * \
                    np.random.randn(self._hyperparams['x0var'].shape)
            mj_X += x0n
        noisy_body_idx = self._hyperparams['noisy_body_idx'][condition]

        # Not called
        if noisy_body_idx.size > 0:
            for i in range(len(noisy_body_idx)):
                idx = noisy_body_idx[i]
                var = self._hyperparams['noisy_body_var'][condition][i]

                self._model[condition]['body_pos'][idx, :] += \
                        var * np.random.randn(1, 3)

        # self._world[condition].set_model(self._model[condition])

        ## INIT BAXTER
        #self.baxter.move_baxter_to_joint_positions([0.32, -0.71, 0.68, 1.09, 0.07, 0.76, 0.13])   # for ball_punching task
        #self.baxter.move_baxter_to_joint_positions([0.27, -1.14, 0.98, 1.60, 0.15, 0.51, 0.27])

        self.baxter.move_baxter_to_joint_positions(
            self._hyperparams['x0'][condition][0:7])

        new_sample = self._init_sample(
            condition, feature_fn=feature_fn)  # new_sample: class 'Sample'

        for t in range(self.T):
            # for t in range(12):
            X_t = new_sample.get_X(t=t)
            obs_t = new_sample.get_obs(t=t)
            print obs_t.shape
            obs.append(obs_t)

            # set the ACTION for the bot gotten from the policy calculations, and apply.
            #mj_U = policy.act(X_t, obs_t, t, noise[t, :])
            mj_U = policy.act(X_t, obs_t, t, noise[t, :], condition)
            U[t, :] = mj_U

            # if verbose:
            #     self._world[condition].plot(mj_X)

            # every step but the last
            if (t + 1) < self.T:
                for _ in range(self._hyperparams['substeps']):

                    # This is the call to mjcpy to set the robot
                    # mj_X, _ = self._world[condition].step(mj_X, mj_U)

                    # Set the baxter joint velocities through the Baxter API
                    self.baxter.set_baxter_joint_velocities(mj_U)
                    #self.baxter.set_baxter_joint_positions(mj_U)
                    #self.baxter.set_baxter_joint_torques(mj_U)

                    # mj_X[self._joint_idx] = self.baxter.get_baxter_joint_angles_positions()
                    # mj_X[self._vel_idx] = self.baxter.get_baxter_joint_angles_velocities()

                    # mj_X = self.baxter.get_baxter_joint_angles()

                #TODO: Some hidden state stuff will go here.
                # self._data = self._world[condition].get_data()
                #time.sleep(1)
                print "current step(t): ", t
                self._set_sample(new_sample,
                                 mj_X,
                                 t,
                                 condition,
                                 feature_fn=feature_fn)
                if t == 0:
                    raw_input('first time step end')

            fp_t = new_sample.get(IMAGE_FEAT, t)
            # img_t = self._get_image_from_obs(obs_t)
            img_t = new_sample.get(RGB_IMAGE, t)
            # path = '/hdd/gps-master/experiments/test_obs/data_files/check_obs/' + 'img_%d' % t
            # np.save(path, img_t)
            fp.append(fp_t)
            img.append(img_t)
        fp = np.asarray(fp)
        img = np.asarray(img)
        obs = np.asarray(obs)

        ## dongju : to save feature points and image observed
        path = '/hdd/gps-master/experiments/' + 'block_insert_new' + '/data_files/check_fp'
        if not os.path.exists(path):
            os.mkdir(path)
            print path, ' is created'
        fname = path + '/fp_%d_%d.npz' % (itr, condition)
        np.savez_compressed(fname, fp=fp, img=img, obs=obs)

        new_sample.set(ACTION, U)
        new_sample.set(NOISE, noise)

        if save:
            self._samples[condition].append(new_sample)
        return new_sample
Beispiel #23
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
        Returns:
            sample: A Sample object.
        """
        # There are different trajectories based on if reset or not
        if self.reset_time:
            trajectories = self.reset_trajectories
        else:
            trajectories = self.trajectories

        if condition not in trajectories:  # If this hasn't been initialized yet
            if self.reset_time:
                self.init_reset_traj(condition, policy)
            else:
                self.compute_reference_trajectory(condition, policy)

        self.samples_taken[condition] += 1  # Increment number of samples taken

        # Every some many samples, take more of the trajectory
        # Unless we have reached the end or we are using varying T
        if self.samples_taken[condition] % (self.num_samples * self.iter_per_seg) == 0 and self.iter_count != 0 \
        and (self.T != self.final_T or self.varying_T):
            self.update_T_then_policy(policy, condition)

        self.T = self.cur_T[
            condition]  # Make sure the T is correct for the condition we are on

        ref_traj_info = trajectories[condition]
        # Length of the trajectory
        traj_length = len(self.full_ref_ee[condition])
        # This is how long the current trajectory we're using is - self.T
        if self.T == self.final_T:  # If we have gotten to the whole trajectory
            ref_traj = self.trajectories[condition][
                'ee']  # Current reference trajectory
        else:  # Otherwise pad the reference trajectory as well
            ref_traj = self.trajectories[condition]['ee'][:self.T -
                                                          self.padding]
            ref_traj.extend([
                self.trajectories[condition]['ee'][self.T - self.padding - 1]
            ] * self.padding)

        print('The length of the trajectory we are currently using is ' +
              str(self.T))
        print 'Sampling, condition', condition
        self.reset(condition)

        #added from agent_ros.py of public gps codebase
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                print('well this got called')
                self._init_tf(policy.dU)

        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand()
        #trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise)

        if self.samples_taken[
                condition] % self.num_samples == 0 and self.samples_taken[
                    condition] != 0:
            self.iter_count += 1  # This is the full count
            self.pickle_self()  # Pickle self and send to data files lmaooo
            with open(
                    'iter' + str(self.iter_count) + '_cond' + str(condition) +
                    '.txt', 'w') as f:
                the_noise = np.zeros((self.T, self.dU))
                f.write(str(policy_to_msg(policy, the_noise)))

        trial_command.T = self.T
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']
        ee_points = self._hyperparams['end_effector_points']
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = ref_traj[-1]
        trial_command.state_datatypes = self._hyperparams['state_include']
        trial_command.obs_datatypes = self._hyperparams['state_include']

        if self.use_tf is False:
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout'])
        else:
            self._trial_service.publish(trial_command)
            sample_msg = self.run_trial_tf(
                policy, time_to_run=self._hyperparams['trial_timeout'])

        sample = msg_to_sample(sample_msg, self)
        sample.set('target_traj_ee_points',
                   [points - ref_traj[-1] for points in ref_traj])

        sample.set(REF_OFFSETS, ref_traj_info['offsets'][:self.T])
        sample.set(REF_TRAJ,
                   np.array([ref_traj_info['offsets'].flatten()] * self.T))

        if save:
            self._samples[condition].append(sample)
        if self.varying_T:  # Only save this if you are gonna use varying T
            self.saved_samples[condition].append(
                sample)  # Save it here too just in case
        self.reset(condition)  # Might as well reset for the heck of it
        return sample
Beispiel #24
0
    def sample(self,
               policy,
               condition,
               save=True,
               noisy=True,
               reset_cond=None,
               **kwargs):
        """Performs agent reset and rolls out given policy to collect a sample.

        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
            reset_cond: The initial condition to reset the agent into.

        Returns:
            sample: A Sample object.

        """
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        sample = Sample(self)
        self.reset(reset_cond)

        # Execute policy over a time period of [0,T]
        # TODO: Find better solution to change mode.
        # relax arm to change mode to torque. If this is not done, the mode will be changed in timestep t=0 causing
        # the loop to be slow in timestep t=1 because the mutex in the cpp is locked. """
        self.relax_arm()
        time.sleep(1)

        start = time.time()
        for t in range(self.T):
            # Read sensors and store sensor data in sample
            latest_sample = self.get_data()
            for sensor_type in self.x_data_types:
                sample.set(sensor_type, latest_sample.get(sensor_type), t)
            sample.set(END_EFFECTOR_POINT_JACOBIANS,
                       latest_sample.get(END_EFFECTOR_POINT_JACOBIANS),
                       t=t)

            # Get action
            U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise)

            # TODO: find better solution to clip (same as in cpp)
            torque_limits_ = np.array([4.0, 4.0, 4.0, 4.0, 1.0, 1.0, .5])
            U_t = np.clip(U_t, -torque_limits_, torque_limits_)

            # Perform action
            self.reset_arm(None, None, U_t, False)
            sample.set(ACTION, U_t, t)

            # Check if agent is keeping up
            sleep_time = start + (t + 1) * self.dt - time.time()
            if sleep_time < 0:
                logging.critical(
                    "Agent can't keep up.In timestep %i it is %fs behind." %
                    (t, sleep_time))
            elif sleep_time < self.dt / 2:
                logging.warning("Agent may not keep up (%.0f percent busy)" %
                                (((self.dt - sleep_time) / self.dt) * 100))

            # Wait for next timestep
            if sleep_time > 0:
                time.sleep(sleep_time)

        if save:
            self._samples[condition].append(sample)
        self.reset(reset_cond)
        return sample
Beispiel #25
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Runs a trial and constructs a new sample containing information
        about the trial.
        Args:
            policy: Policy to to used in the trial.
            condition: Which condition setup to run.
            verbose: Whether or not to plot the trial.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        """
        # Create new sample, populate first time step.
        feature_fn = None
        if 'get_features' in dir(policy):
            feature_fn = policy.get_features
        new_sample = self._init_sample(condition, feature_fn=feature_fn)
        mj_X = self._world[condition].reset(
        )  #initial state in mj_world, condition-specific
        U = np.zeros([self.T, self.dU])
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))
        if np.any(self._hyperparams['x0var'][condition] > 0):
            x0n = self._hyperparams['x0var'] * \
                    np.random.randn(self._hyperparams['x0var'].shape)
            mj_X += x0n
        noisy_body_idx = self._hyperparams['noisy_body_idx'][condition]
        if noisy_body_idx.size > 0:
            for i in range(len(noisy_body_idx)):
                idx = noisy_body_idx[i]
                var = self._hyperparams['noisy_body_var'][condition][i]
                self._model[condition]['body_pos'][idx, :] += \
                        var * np.random.randn(1, 3)

        timestep = 0.05
        speedup = 1
        # Take the sample.
        for t in range(self.T):
            X_t = new_sample.get_X(t=t)  #get state from _data in sample class
            obs_t = new_sample.get_obs(t=t)
            mj_U = policy.act(X_t, obs_t, t, noise[t, :])
            U[t, :] = mj_U

            if (t + 1) < self.T:
                mj_X, reward, terminal, _ = self._world[condition].step(mj_U)

                # if verbose:
                # self._world[condition].render()
                # time.sleep(timestep / speedup)

                # import time as ttime
                #self._data = self._world[condition].get_data()     #get data from mj_world
                self._set_sample(new_sample,
                                 mj_X,
                                 reward,
                                 t,
                                 condition,
                                 feature_fn=feature_fn)
        new_sample.set(ACTION, U)
        new_sample.set(NOISE, noise)
        if save:
            self._samples[condition].append(new_sample)
        return new_sample
Beispiel #26
0
	def sample(self, policy, condition, verbose=False, save=True, noisy=True):
		"""
		Runs a trial and constructs a new sample containing information
		about the trial.

		Args:
			policy: Policy to to used in the trial.
			condition (int): Which condition setup to run.
			verbose (boolean): Whether or not to plot the trial (not used here).
			save (boolean): Whether or not to store the trial into the samples.
			noisy (boolean): Whether or not to use noise during sampling.
		"""

		# reset the world and assign the initialized state to new_sample
		self._worlds[condition].run()
		self._worlds[condition].reset_world()
		b2d_X = self._worlds[condition].get_state()
		new_sample = self._init_sample(b2d_X)
		
		self.reach_start = None
		self.reach_end = None
		
		# initialize a dummy action sequence
		U = np.zeros([self.T, self.dU])
		if noisy:
			noise = generate_noise(self.T, self.dU, self._hyperparams)
		else:
			noise = np.zeros((self.T, self.dU))
		for t in range(self.T):
			X_t = new_sample.get_X(t=t)
			obs_t = new_sample.get_obs(t=t)
			U[t, :] = policy.act(X_t, obs_t, t, noise[t, :])
			# print(U[t])
			if (t+1) < self.T:
				for _ in range(self._hyperparams['substeps']):
					self._worlds[condition].run_next(U[t, :])
				if self._worlds[condition].reach:
					self._worlds[condition].reach = False
					if self.reach_start == None:
						self.reach_start = t
						# print("reach_start", t)
					elif self.reach_end == None or t>self.reach_end:
						self.reach_end = t
					if t==self.T-2:
						# continue reaching till the end of series
						# print("reach_end", self.reach_end)
						period = self.reach_end - self.reach_start
						# print("reach period", period)
						if period > 3:
							self.finishing = True
							# self.finishing_time = self.reach_end
							self.finishing_time = self.reach_start
				elif self.reach_end == t-1 :
					# just leave
					# print("reach_end", self.reach_end)
					period = self.reach_end - self.reach_start
					# print("reach period", period)
					if period > 1:
						self.finishing = True
						# self.finishing_time = self.reach_end
						self.finishing_time = self.reach_start
						if self.finishing_time == 0:
							self.finishing_time = 1				
				b2d_X = self._worlds[condition].get_state()
				self._set_sample(new_sample, b2d_X, t)			
		
		new_sample.set(ACTION, U)
		if save:
			self._samples[condition].append(new_sample)
		# if self.finishing:
			# print("agent_bus t= ", self.finishing_time)
		return new_sample
Beispiel #27
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed.
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        self.reset()
        new_sample = self._init_sample(condition)
        b_X = self._hyperparams['x0'][condition]
        U = np.zeros([self.T, self.dU])
        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))
        noise /= 50
        # print(noise[0,:])

        # noise[5] /= 10
        # noise[4] /= 20

        # noise[1] *= 5
        # noise[3] *= 5
        # Take the sample.
        t = 0
        while t < self.T:
            curr_time = rospy.get_time()
            X_t = new_sample.get_X(t=t)
            obs_t = new_sample.get_obs(t=t)
            if self._hyperparams['curr_runs'] < 5:
                b_U = self._hyperparams['u0'][t, :]
                b_U = noise[t, :]
                # print(b_U)

            else:
                b_U = policy.act(X_t, obs_t, t, noise[t, :])
            U[t, :] = b_U

            # clip deltas to the given limits!
            b_U[:-1] = np.clip(b_U[:-1], -self.taskspace_deltas[:-1],
                               self.taskspace_deltas[:-1])
            b_U[-1] = np.clip(b_U[-1], -self.taskspace_deltas[-1],
                              self.taskspace_deltas[-1])

            if (t + 1) < self.T:
                # b_X, b_U_check, image = self._step(b_U, curr_time)
                b_X, b_U_check, image, rcnn_image = self._step_taskspace(
                    b_U, X_t, curr_time)
                if b_X is None:
                    self.reset()
                    rospy.sleep(0.5)
                    new_sample = self._init_sample(condition)
                    b_X = self._hyperparams['x0'][condition]
                    U = np.zeros([self.T, self.dU])
                    # Generate noise.
                    if noisy:
                        noise = generate_noise(self.T, self.dU,
                                               self._hyperparams)
                    else:
                        noise = np.zeros((self.T, self.dU))
                    noise /= 50
                    t = 0
                    continue
                else:
                    self._set_sample(new_sample, b_X, t, condition)
                    new_sample.set(RGB_IMAGE, image, t=t + 1)
                    new_sample.set(RCNN_OUTPUT, image, t=t + 1)

            t += 1
        if self._hyperparams['curr_runs'] < 5:
            self._hyperparams['curr_runs'] += 1

        new_sample.set(ACTION, U)
        new_sample.set(NOISE, noise)
        print("Took sample...")
        self.reset()
        if save:
            self._samples[condition].append(new_sample)
        return new_sample
Beispiel #28
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
        Returns:
            sample: A Sample object.
        """
        if TfPolicy is not None:  # user has tf installed (try import)
            if isinstance(policy, TfPolicy):        # False, policy = Linear Gaussian policy 
                self._init_tf(policy.dU)

        # Reset the agent for a particular experiment condition
        self.reset(condition)

        # Generate noise.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute trial.
        trial_command = TrialCommand() # ROS message 
        trial_command.id = self._get_next_seq_id()
        trial_command.controller = policy_to_msg(policy, noise) # ControllerParams
        trial_command.T = self.T                                # Trajectory length
        trial_command.id = self._get_next_seq_id()
        trial_command.frequency = self._hyperparams['frequency']    # Controller frequency
        
        ee_points = self._hyperparams['end_effector_points']        # 3*n_points array containing offsets
        trial_command.ee_points = ee_points.reshape(ee_points.size).tolist()
        trial_command.ee_points_tgt = \
                self._hyperparams['ee_points_tgt'][condition].tolist() #  3*n_points array containing the desired ee_points for this trial
        
        trial_command.state_datatypes = self._hyperparams['state_include']  # Which data types to include in state
        trial_command.obs_datatypes = self._hyperparams['state_include']    # Which data types to include in observation

        # ------------- Local Policy -------------
        # use_tf is False 
        if self.use_tf is False:
            # self._trial_service = ServiceEmulator(
            #     self._hyperparams['trial_command_topic'], TrialCommand,
            #     self._hyperparams['sample_result_topic'], SampleResult
            # )
            
            # Publish a message and wait for the response
            sample_msg = self._trial_service.publish_and_wait(
                trial_command, timeout=self._hyperparams['trial_timeout']
            )
            sample = msg_to_sample(sample_msg, self)
            
            # Saving the samples for tf[?]
            if save:
                self._samples[condition].append(sample)
            
            return sample
        else:
            self._trial_service.publish(trial_command)

            # Run an async controller from a policy. 
            # The async controller receives observations from ROS subscribers
            # and then uses them to publish actions
            sample_msg = self.run_trial_tf(policy, 
                time_to_run=self._hyperparams['trial_timeout'])
            
            sample = msg_to_sample(sample_msg, self)
            
            if save:
                self._samples[condition].append(sample)
            return sample
Beispiel #29
0
 def merge_controller(self,
                      policy_cur,
                      alpha1,
                      policy_prev,
                      alpha2,
                      condition,
                      verbose=True,
                      save=True,
                      noisy=True):
     """
         Runs a trial and constructs a new sample containing information
         about the trial.
         Args:
             policy: Policy to to used in the trial.
             condition: Which condition setup to run.
             verbose: Whether or not to plot the trial.
             save: Whether or not to store the trial into the samples.
             noisy: Whether or not to use noise during sampling.
         """
     # Create new sample, populate first time step.
     feature_fn = None
     if 'get_features' in dir(policy_cur):
         feature_fn = policy_cur.get_features
     new_sample = self._init_sample(condition, feature_fn=feature_fn)
     mj_X = self._hyperparams['x0'][condition]
     U = np.zeros([self.T, self.dU])
     if noisy:
         noise = generate_noise(self.T, self.dU, self._hyperparams)
     else:
         noise = np.zeros((self.T, self.dU))
     if np.any(self._hyperparams['x0var'][condition] > 0):
         x0n = self._hyperparams['x0var'] * \
               np.random.randn(self._hyperparams['x0var'].shape)
         mj_X += x0n
     noisy_body_idx = self._hyperparams['noisy_body_idx'][condition]
     if noisy_body_idx.size > 0:
         for i in range(len(noisy_body_idx)):
             idx = noisy_body_idx[i]
             var = self._hyperparams['noisy_body_var'][condition][i]
             self._model[condition]['body_pos'][idx, :] += \
                 var * np.random.randn(1, 3)
     # Take the sample.
     for t in range(self.T):
         X_t = new_sample.get_X(t=t)
         obs_t = new_sample.get_obs(t=t)
         mj_U = policy_cur.merge_act(policy_prev, alpha1, alpha2, X_t,
                                     obs_t, t, noise[t, :])
         # mj_U = policy_cur.act(X_t, obs_t, t, noise[t, :])
         U[t, :] = mj_U
         if verbose:
             self._world[condition].plot(mj_X)
         if (t + 1) < self.T:
             for _ in range(self._hyperparams['substeps']):
                 mj_X, _ = self._world[condition].step(mj_X, mj_U)
             # TODO: Some hidden state stuff will go here.
             self._data = self._world[condition].get_data()
             self._set_sample(new_sample,
                              mj_X,
                              t,
                              condition,
                              feature_fn=feature_fn)
     new_sample.set(ACTION, U)
     if save:
         self._samples[condition].append(new_sample)
     return new_sample
Beispiel #30
0
    def test_sample(self,
                    policy,
                    condition,
                    verbose=True,
                    save=False,
                    noisy=False,
                    length=200):
        """
                Reset and execute a policy and collect a sample to test the learned policy.
                Args:
                    policy: A Policy object.
                    condition: Which condition setup to run.
                    verbose: Unused for this agent.
                    save: Whether or not to store the trial into the samples.
                    noisy: Whether or not to use noise during sampling.
                Returns:
                    sample: A Sample object.
        """
        # user has tf installed.
        if TfPolicy is not None:
            if isinstance(policy, TfPolicy):
                self._init_tf(policy.dU)

        start_time = time.time()
        episode_reward = 0.

        # reset state
        obs, state, _ = self.reset()
        sensor_state = {'POS_FORCE': state}
        # new_sample = self._init_sample(sensor_state)
        new_sample = self._init_test_sample(sensor_state, length)
        U = np.zeros([length, self.dU])

        # Generate noise
        if noisy:
            noise = generate_noise(length, self.dU, self._hyperparams)
        else:
            noise = np.zeros((length, self.dU))

        # Sample
        for t in range(length - 1):
            print(
                " ========================= Step {} =========================".
                format(t))
            X_t = new_sample.get_X(t=t)
            obs_t = new_sample.get_obs(t=t)

            # print('observation:', obs_t)
            U[t, :] = policy.act(X_t, obs_t, t, noise[t, :])
            action = np.clip(U[t, :], -1, 1) * self._env.action_high_bound
            print('gps_action:', action)

            # Execute trial.
            new_obs, next_state, r, done, safe_or_not, final_action = \
                self._env.step(action, t)

            episode_reward += r
            sensor_next_state = {'POS_FORCE': next_state}
            self._set_sample(new_sample, sensor_next_state, t + 1)

            if safe_or_not is False:
                break

            if done:
                break

        end_time = time.time()
        episode_time = end_time - start_time
        new_sample.set(ACTION, U)

        if save:
            self._samples[condition].append(new_sample)

        return new_sample, episode_reward, episode_time