Beispiel #1
0
    def sample(self, policy, condition, verbose=True, save=True, noisy=True):
        """This is the main method run when the Agent object is called by GPS.
        Draws a sample from the environment, using the specified policy and
        under the specified condition.
        If "save" is True, then append the sample object of type Sample to
        self._samples[condition].
        TensorFlow is not yet implemented (FIXME)."""

        # Reset the arm to initial configuration at start of each new trial.
        self.reset(condition)

        # Generate noise to be used in the policy object to compute next state.
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Execute the trial.
        sample_data = self._run_trial(
            policy, noise, time_to_run=self._hyperparams['trial_timeout'])

        # Write trial data into sample object.
        sample = Sample(self)
        for sensor_id, data in sample_data.iteritems():
            sample.set(sensor_id, np.asarray(data))

        # Save the sample to the data structure. This is controlled by gps_main.py.
        if save:
            self._samples[condition].append(sample)

        return sample
    def _init_sample(self, condition, feature_fn=None):
        """
        Construct a new sample and fill in the first time step.
        Args:
            condition: Which condition to initialize.
            feature_fn: funciton to comptue image features from the observation.
        """
        sample = Sample(self)

        # Initialize world/run kinematics
        #self._init(condition)

        # Initialize sample with stuff from _data
        # pdb.set_trace()
        # get data from mj_world, condition-specific
        data = self._world[condition].reset(self._full_init_state[condition])
        # data = self._world[condition].reset()
        sample.set(END_EFFECTOR_POINTS, data[0:8],
                   t=0)  #Set _data in sample class
        sample.set(JOINT_VELOCITIES, data[8:17], t=0)
        sample.set(JOINT_ANGLES, data[17:20], t=0)
        sample.set(END_EFFECTOR_POINT_VELOCITIES, data[20:23], t=0)
        #sample.set(END_EFFECTOR_POINT_JACOBIANS, np.array(0.0), t=0)

        return sample
Beispiel #3
0
    def _roll_out(self, pol, itr, cond, i):
        if self.use_mpc and itr > 0:
            T = self.agent.T
            M = self.mpc_agent.T
            N = int(ceil(T / (M - 1.)))
            X_t = self.agent.x0[cond]

            # Only forward pass one time per cond,
            # because this same for all sample
            if i == 0:
                # Note: At this time algorithm.prev = algorithm.cur,
                #       and prev.traj_info already have x0mu, x0sigma.
                self.off_prior, _ = self.algorithm.traj_opt.forward(
                    pol, self.algorithm.prev[cond].traj_info)
                self.agent.publish_plan(self.off_prior)

            if type(self.algorithm) == AlgorithmTrajOpt:
                pol_info = None
            else:
                pol_info = self.algorithm.cur[cond].pol_info

            for n in range(N):
                # Note: M-1 because action[M] = [0,0].
                t_traj = n * (M - 1)
                reset = True if (n == 0) else False

                mpc_pol, mpc_state = self.algorithm.mpc[cond][i].update(
                    n, X_t, self.off_prior, pol,
                    self.algorithm.cur[cond].traj_info, t_traj, pol_info)
                self.agent.publish_plan(mpc_state, True)
                new_sample = self.mpc_agent.sample(
                    mpc_pol,
                    cond,
                    reset=reset,
                    noisy=True,
                    verbose=(i < self._hyperparams['verbose_trials']))
                X_t = new_sample.get_X(t=M - 1)
            """
             Merge sample for optimize offline trajectory distribution
            """
            full_sample = Sample(self.agent)
            sample_lists = self.mpc_agent.get_samples(cond)
            keys = sample_lists[0]._data.keys()
            t = 0
            for sample in sample_lists:
                for m in range(sample.T - 1):
                    for sensor in keys:
                        full_sample.set(sensor, sample.get(sensor, m), t)
                    t = t + 1
                    if t + 1 > T:
                        break

            self.agent._samples[cond].append(full_sample)
            # Clear agent samples.
            self.mpc_agent.clear_samples()
        else:
            self.agent.sample(
                pol, cond, verbose=(i < self._hyperparams['verbose_trials']))
Beispiel #4
0
 def _init_sample(self, T=None):
     """
     Construct a new sample and fill in the first time step.
     """
     sample = Sample(self, T)
     self._advance_simulation()
     for sensor in self._sensor_types:
         sample.set(sensor, self._sensor_readings[sensor], t=0)
     return sample
Beispiel #5
0
def msg_to_sample(ros_msg, agent):
    """
    Convert a SampleResult ROS message into a Sample Python object.
    """
    sample = Sample(agent)
    for sensor in ros_msg.sensor_data:
        sensor_id = sensor.data_type
        shape = np.array(sensor.shape)
        data = np.array(sensor.data).reshape(shape)
        sample.set(sensor_id, data)
    return sample
Beispiel #6
0
def msg_to_sample(ros_msg, agent):
    """
    Convert a SampleResult ROS message into a Sample Python object.
    """
    sample = Sample(agent)
    for sensor in ros_msg.sensor_data:
        sensor_id = sensor.data_type
        shape = np.array(sensor.shape)
        data = np.array(sensor.data).reshape(shape)
        sample.set(sensor_id, data)
    return sample
Beispiel #7
0
def msg_to_sample(ros_msg, agent):
    """
    Convert a SampleResult ROS message into a Sample Python object.
    """
    sample = Sample(agent)

    # Sensor_data
    # int32 id
    # DataType[] sensor_data

    for sensor in ros_msg.sensor_data:
        sensor_id = sensor.data_type
        shape = np.array(sensor.shape)
        data = np.array(sensor.data).reshape(shape)
        sample.set(sensor_id, data) # Set trajectory data for a particular sensor.
    return sample
Beispiel #8
0
 def _init_sample(self, b2d_X):
     """
     Construct a new sample and fill in the first time step.
     """
     sample = Sample(self)
     self._set_sample(sample, b2d_X, -1)
     return sample
 def _init_test_sample(self, b2d_X, length):
     """
     Construct a new sample and fill in the first time step.
     """
     sample = Sample(self, test=True, length=length)
     self._set_sample(sample, b2d_X, 0)
     return sample
Beispiel #10
0
 def _init_sample(self, condition, feature_fn=None):
     """
     Construct a new sample and fill in the first time step.
     """
     sample = Sample(self)
     self._set_sample(sample, condition, -1, feature_fn=feature_fn)
     return sample
Beispiel #11
0
    def _init_sample(self, condition, feature_fn=None):
        """
        Construct a new sample and fill in the first time step.
        Args:
            condition: Which condition to initialize.
            feature_fn: function to compute image features from the observation.
        """

        sample = Sample(self)
        t = -1
        stateX, jac_t, image_data = self.get_state(t)
        self._set_sample(sample, stateX, jac_t, t,
                         condition)  # is jac_t correct or should it be jac_r??
        return sample, image_data
Beispiel #12
0
    def _init_sample(self, b2d_X):
        """
        Construct a new sample and fill in the first time step.
        """
        sample = Sample(self)
        self._set_sample(sample, b2d_X, -1)

        feature_fn = None
        if RGB_IMAGE in self.obs_data_types:
            ## TODO : replace below line with other function
            # ex 1:
            # self.img = self.baxter.get_baxter_camera_image()
            # sample.set(RGB_IMAGE, np.transpose(self.img, (2, 1, 0)).flatten(), t = 0)
            # ex 2:
            # sample.set(RGB_IMAGE, img_data, t=0)

            sample.set(RGB_IMAGE_SIZE, [
                self._hyperparams['image_channels'],
                self._hyperparams['image_width'],
                self._hyperparams['image_height']
            ],
                       t=None)
            if IMAGE_FEAT in self.obs_data_types:
                raise ValueError(
                    'Image features should not be in observation, just state')
            if feature_fn is not None:
                obs = sample.get_obs(
                )  # Assumes that the rest of the sample has been populated
                sample.set(IMAGE_FEAT, feature_fn(obs), t=0)
            else:
                sample.set(
                    IMAGE_FEAT,
                    np.zeros((self._hyperparams['sensor_dims'][IMAGE_FEAT], )),
                    t=0)

        return sample
Beispiel #13
0
    def iteration(self, sample_lists, itr, train_gcm=False):
        """
        Run iteration of MDGPS-based guided policy search.

        Args:
            sample_lists: List of SampleList objects for each condition.
            _: to match parent class
        """
        # Get all samples
        samples = [
            sample for i in range(len(sample_lists))
            for sample in sample_lists[i].get_samples()
        ]

        # Split longer trajectories in shorter segements
        if samples[0].T > self.T:
            assert samples[0].T % self.T == 0
            samples[0].agent.T = self.T  # Fake new T
            new_samples = []
            for sample in samples:
                for i in range(samples[0].T / self.T):
                    new_sample = Sample(sample.agent)
                    for sensor in sample._data:  # Split data
                        new_sample._data[sensor] = sample._data[sensor][
                            i * self.T:(i + 1) * self.T]
                    new_samples.append(new_sample)
            samples = new_samples

        self.N = len(samples)
        print("itr", itr, "N: ", self.N, "M: ", self.M)
        assert self.min_samples_per_cluster * self.M <= self.N

        X = np.asarray([sample.get_X() for sample in samples])
        U = np.asarray([sample.get_U() for sample in samples])

        # Update global dynamics prior
        self.dynamics_prior.update(X, U)

        # Store end effector points for visualization
        self.eeps = [s.get(END_EFFECTOR_POINTS) for s in samples]

        # Cluster samples
        clusterings = self.tac(samples, self.initial_clustering)
        for i in range(self.random_resets):
            clusterings.extend(self.tac(samples, 'random'))
        self.responsibilitieses = [c[0]
                                   for c in clusterings]  # Store for export
        # Select clustering with maximal likelihood
        self._assign_samples(
            samples, clusterings[np.argmax([c[1] for c in clusterings])][0])
        self.m_step(
            for_tac=False
        )  #Fit linearizations again, but this time also using the local trajectories

        # C-step
        if self.iteration_count > 0:
            self._stepadjust()
        self._update_trajectories()

        # S-step
        self._update_policy()

        # Prepare for next iteration
        self._advance_iteration_variables()
Beispiel #14
0
    def _init_sample(self, condition):
        """
        Construct a new sample and fill in the first time step.
        Args:
            condition: Which condition to initialize.
        """
        sample = Sample(self)

        # Initialize world/run kinematics
        self._init(condition)

        # Initialize sample with stuff from _data
        data = self._world[condition].get_data()
        sample.set(JOINT_ANGLES, data['qpos'].flatten(), t=0)
        sample.set(JOINT_VELOCITIES, data['qvel'].flatten(), t=0)
        eepts = data['site_xpos'].flatten()
        sample.set(END_EFFECTOR_POINTS, eepts, t=0)
        sample.set(END_EFFECTOR_POINT_VELOCITIES, np.zeros_like(eepts), t=0)
        jac = np.zeros([eepts.shape[0], self._model[condition]['nq']])
        for site in range(eepts.shape[0] // 3):
            idx = site * 3
            jac[idx:(idx + 3), :] = self._world[condition].get_jac_site(site)
        sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=0)

        # save initial image to meta data
        self._world[condition].plot(self._hyperparams['x0'][condition])
        img = self._world[condition].get_image_scaled(
            self._hyperparams['image_width'],
            self._hyperparams['image_height'])
        # mjcpy image shape is [height, width, channels],
        # dim-shuffle it for later conv-net processing,
        # and flatten for storage
        img_data = np.transpose(img["img"], (1, 0, 2)).flatten()
        # if initial image is an observation, replicate it for each time step
        if CONTEXT_IMAGE in self.obs_data_types:
            sample.set(CONTEXT_IMAGE, np.tile(img_data, (self.T, 1)), t=None)
        else:
            sample.set(CONTEXT_IMAGE, img_data, t=None)
        sample.set(CONTEXT_IMAGE_SIZE,
                   np.array([
                       self._hyperparams['image_channels'],
                       self._hyperparams['image_width'],
                       self._hyperparams['image_height']
                   ]),
                   t=None)
        # only save subsequent images if image is part of observation
        if RGB_IMAGE in self.obs_data_types:
            sample.set(RGB_IMAGE, img_data, t=0)
            sample.set(RGB_IMAGE_SIZE, [
                self._hyperparams['image_channels'],
                self._hyperparams['image_width'],
                self._hyperparams['image_height']
            ],
                       t=None)
        return sample
Beispiel #15
0
    def _eval_cost(self, cond, prev_cost=False):
        """
        Evaluate costs for all samples for a condition.
        Args:
            cond: Condition to evaluate cost on.
            prev: Whether or not to use previous_cost (for ioc stepadjust)
        """
        # Constants.
        T, dX, dU = self.T, self.dX, self.dU

        synN = self._hyperparams['synthetic_cost_samples']
        if synN > 0:
            agent = self.cur[cond].sample_list.get_samples()[0].agent
            X, U, _ = self._traj_samples(cond, synN)
            syn_samples = []
            for i in range(synN):
                sample = Sample(agent)
                sample.set_XU(X[i, :, :], U[i, :, :])
                syn_samples.append(sample)
            all_samples = SampleList(syn_samples +
                                     self.cur[cond].sample_list.get_samples())
        else:
            all_samples = self.cur[cond].sample_list
        N = len(all_samples)

        # Compute cost.
        cs = np.zeros((N, T))
        cc = np.zeros((N, T))
        cv = np.zeros((N, T, dX + dU))
        Cm = np.zeros((N, T, dX + dU, dX + dU))
        if self._hyperparams['ioc']:
            cgt = np.zeros((N, T))
        for n in range(N):
            sample = all_samples[n]
            # Get costs.
            if prev_cost:
                l, lx, lu, lxx, luu, lux = self.previous_cost[cond].eval(
                    sample)
            else:
                l, lx, lu, lxx, luu, lux = self.cost[cond].eval(sample)
            # Compute the ground truth cost
            if self._hyperparams['ioc'] and n >= synN:
                l_gt, _, _, _, _, _ = self.gt_cost[cond].eval(sample)
                cgt[n, :] = l_gt
            cc[n, :] = l
            cs[n, :] = l

            # Assemble matrix and vector.
            cv[n, :, :] = np.c_[lx, lu]
            Cm[n, :, :, :] = np.concatenate(
                (np.c_[lxx, np.transpose(lux, [0, 2, 1])], np.c_[lux, luu]),
                axis=1)

            # Adjust for expanding cost around a sample.
            X = sample.get_X()
            U = sample.get_U()
            yhat = np.c_[X, U]
            rdiff = -yhat
            rdiff_expand = np.expand_dims(rdiff, axis=2)
            cv_update = np.sum(Cm[n, :, :, :] * rdiff_expand, axis=1)
            cc[n, :] += np.sum(rdiff * cv[n, :, :], axis=1) + 0.5 * \
                    np.sum(rdiff * cv_update, axis=1)
            cv[n, :, :] += cv_update

        # Fill in cost estimate.
        if prev_cost:
            traj_info = self.cur[cond].prevcost_traj_info
            traj_info.dynamics = self.cur[cond].traj_info.dynamics
            traj_info.x0sigma = self.cur[cond].traj_info.x0sigma
            traj_info.x0mu = self.cur[cond].traj_info.x0mu
        else:
            traj_info = self.cur[cond].traj_info
            self.cur[cond].cs = cs[synN:]  # True value of cost.
        traj_info.cc = np.mean(cc, 0)  # Constant term (scalar).
        traj_info.cv = np.mean(cv, 0)  # Linear term (vector).
        traj_info.Cm = np.mean(Cm, 0)  # Quadratic term (matrix).

        if self._hyperparams['ioc']:
            self.cur[cond].cgt = cgt[synN:]
Beispiel #16
0
    def _init_sample(self, condition, feature_fn=None):
        """
        Construct a new sample and fill in the first time step.
        Args:
            condition: Which condition to initialize.
            feature_fn: funciton to comptue image features from the observation.
        """

        sample = Sample(self)

        # Initialize world/run kinematics
        q = [self._trial_arm.joint_angle(j) for j in self.joint_names]
        dq = [self._trial_arm.joint_velocity(j) for j in self.joint_names]
        pos = list(self._trial_arm.endpoint_pose()['position'])
        orn = list(self._trial_arm.endpoint_pose()['orientation'])
        dpos = list(self._trial_arm.endpoint_velocity()['linear'])
        dorn = list(self._trial_arm.endpoint_velocity()['angular'])
        jac = self._kin_trial.jacobian()

        sample.set(JOINT_ANGLES, np.asarray(q), t=0)
        sample.set(JOINT_VELOCITIES, np.asarray(dq), t=0)
        sample.set(END_EFFECTOR_POINTS, np.asarray(pos), t=0)
        sample.set(END_EFFECTOR_POINT_JACOBIANS, jac[:3, :], t=0)

        img_subs = self.img_subs_list[0]
        depth_subs = self.depth_subs_list[0]
        image = img_subs.img
        depth_rescaled = self.get_depth_img(depth_subs)

        all_visual_features, all_centroids, fig = self._get_rcnn_features(
            image, depth_rescaled)
        try:
            delta_centroid = all_centroids[0] - all_centroids[1]
        except:
            delta_centroid = np.array([30, 30, 30])
        # print(all_centroids)
        # set_trace()# image_buffer.append(image[:,:,::-1])
        feat_visual_1, feat_visual_2, feat_visual_max_1, feat_visual_max_2 = self._apply_feature_selection(
            all_visual_features)

        embedding = np.concatenate([
            delta_centroid, feat_visual_1, feat_visual_max_1, feat_visual_2,
            feat_visual_max_2
        ])

        sample.set(TCN_EMBEDDING, embedding, t=0)

        if fig is not None:
            canvas = FigureCanvas(fig)
            ax = fig.gca()
            canvas.draw()  # draw the canvas, cache the renderer
            img = np.array(fig.canvas.renderer._renderer)
            sample.set(RCNN_OUTPUT, img, t=0)
            sample.set(RGB_IMAGE, image, t=0)
            plt.close(fig)
        else:
            sample.set(RGB_IMAGE, image, t=0)
            sample.set(RCNN_OUTPUT, np.zeros((800, 800, 4)), t=0)

        return sample
Beispiel #17
0
 def _init_sample(self, condition):
     """
     Construct a new sample and fill in the first time step.
     Args:
         condition: Which condition to initialize.
     """
     sample = Sample(self)
     sample.set(JOINT_ANGLES,
                self._hyperparams['x0'][condition][self._joint_idx],
                t=0)
     sample.set(JOINT_VELOCITIES,
                self._hyperparams['x0'][condition][self._vel_idx],
                t=0)
     self._data = self._world.get_data()
     eepts = self._data['site_xpos'].flatten()
     sample.set(END_EFFECTOR_POINTS, eepts, t=0)
     sample.set(END_EFFECTOR_POINT_VELOCITIES, np.zeros_like(eepts), t=0)
     jac = np.zeros([eepts.shape[0], self._model[condition]['nq']])
     for site in range(eepts.shape[0] // 3):
         idx = site * 3
         jac[idx:(idx + 3), :] = self._world.get_jac_site(site)
     sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=0)
     return sample
Beispiel #18
0
    def _init_sample(self, condition, feature_fn=None):
        """
        Construct a new sample and fill in the first time step.
        Args:
            condition: Which condition to initialize.
        """
        sample = Sample(self)
        ## modified
        #self.baxter.move_baxter_to_joint_positions([1.05, -0.01, 0.20, 0.50, 0.47, 0.80, -0.14])
        #self.baxter.move_baxter_to_joint_positions([0.27, -1.14, 0.98, 1.60, 0.15, 0.51, 0.27])    # for block_inserting task
        #self.baxter.move_baxter_to_joint_positions([0.32, -0.71, 0.68, 1.09, 0.07, 0.76, 0.13])   # for ball_punching task
        #self.baxter.move_baxter_to_joint_positions(self._hyperparams['x0'][condition][0:7])
        #self.baxter.initialize_left_arm([-0.22549517556152346, 0.36815538867187503, -1.5040681608032227, 0.5817622131408692, -0.5012282218688965, 1.8553497608276368, 0.08935438079223633]) # for block_inserting task

        self.baxter.initialize_left_arm(
            self._hyperparams['initial_left_arm'][condition])  # grasping task

        self.cnt = 0
        self.prev_positions = self.baxter.get_baxter_joint_angles_positions()
        # sample.set(JOINT_ANGLES, np.array(self.baxter.get_baxter_joint_angles_positions()), t=0)

        sample.set(JOINT_ANGLES, np.array(self.prev_positions), t=0)
        sample.set(JOINT_VELOCITIES,
                   np.array(self.baxter.get_baxter_joint_angles_velocities()),
                   t=0)
        sample.set(END_EFFECTOR_POINTS,
                   np.array(self.baxter.get_baxter_end_effector_pose()),
                   t=0)
        sample.set(END_EFFECTOR_POINT_VELOCITIES,
                   np.array(self.baxter.get_baxter_end_effector_velocity()),
                   t=0)
        sample.set(END_EFFECTOR_POINT_JACOBIANS,
                   np.array(self.baxter.get_baxter_end_effector_jacobian()),
                   t=0)

        ## NEED TO ADD SENSOR 'RGB_IMAGE'
        ## NEED TO ADD 'get_baxter_camera_image()' in 'baxter_methods.py'
        if RGB_IMAGE in self.obs_data_types:
            #self.baxter.get_baxter_camera_open()
            self.img = self.baxter.get_baxter_camera_image()
            np.savez('camera_image_blind_' + str(condition) + '.npz',
                     img=self.img)
            ## NEED TO CHECK IMAGE SHAPE
            ## NEED TO CHECK IMAGE TYPE - INT? / FLOAT?
            ## MUJOCO: [HEIGHT, WIDTH, CHANNELS] == [300, 480, 3]
            sample.set(RGB_IMAGE,
                       np.transpose(self.img, (2, 1, 0)).flatten(),
                       t=0)
            sample.set(RGB_IMAGE_SIZE, [
                self._hyperparams['image_channels'],
                self._hyperparams['image_width'],
                self._hyperparams['image_height']
            ],
                       t=None)
            if IMAGE_FEAT in self.obs_data_types:
                raise ValueError(
                    'Image features should not be in observation, just state')
            if feature_fn is not None:
                obs = sample.get_obs()
                sample.set(IMAGE_FEAT, feature_fn(obs), t=0)
            else:
                sample.set(
                    IMAGE_FEAT,
                    np.zeros((self._hyperparams['sensor_dims'][IMAGE_FEAT], )),
                    t=0)

        return sample
Beispiel #19
0
    def _init_sample(self, condition, feature_fn=None):
        """
        Construct a new sample and fill in the first time step.
        Args:
            condition: Which condition to initialize.
            feature_fn: funciton to comptue image features from the observation.
        """
        sample = Sample(self)

        # Initialize world/run kinematics
        self._init(condition)

        # Initialize sample with stuff from _data
        data = self._world[condition].get_data()
        sample.set(JOINT_ANGLES, data['qpos'].flatten(), t=0)
        sample.set(JOINT_VELOCITIES, data['qvel'].flatten(), t=0)
        eepts = data['site_xpos'].flatten()
        sample.set(END_EFFECTOR_POINTS, eepts, t=0)
        sample.set(END_EFFECTOR_POINT_VELOCITIES, np.zeros_like(eepts), t=0)

        if (END_EFFECTOR_POINTS_NO_TARGET in self._hyperparams['obs_include']):
            sample.set(END_EFFECTOR_POINTS_NO_TARGET, np.delete(eepts, self._hyperparams['target_idx']), t=0)
            sample.set(END_EFFECTOR_POINT_VELOCITIES_NO_TARGET, np.delete(np.zeros_like(eepts), self._hyperparams['target_idx']), t=0)

        jac = np.zeros([eepts.shape[0], self._model[condition]['nq']])
        for site in range(eepts.shape[0] // 3):
            idx = site * 3
            jac[idx:(idx+3), :] = self._world[condition].get_jac_site(site)
        sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=0)

        # save initial image to meta data
        self._world[condition].plot(self._hyperparams['x0'][condition])
        img = self._world[condition].get_image_scaled(self._hyperparams['image_width'],
                                                      self._hyperparams['image_height'])
        # mjcpy image shape is [height, width, channels],
        # dim-shuffle it for later conv-net processing,
        # and flatten for storage
        img_data = np.transpose(img["img"], (2, 1, 0)).flatten()
        # if initial image is an observation, replicate it for each time step
        if CONTEXT_IMAGE in self.obs_data_types:
            sample.set(CONTEXT_IMAGE, np.tile(img_data, (self.T, 1)), t=None)
        else:
            sample.set(CONTEXT_IMAGE, img_data, t=None)
        sample.set(CONTEXT_IMAGE_SIZE, np.array([self._hyperparams['image_channels'],
                                                self._hyperparams['image_width'],
                                                self._hyperparams['image_height']]), t=None)
        # only save subsequent images if image is part of observation
        if RGB_IMAGE in self.obs_data_types:
            sample.set(RGB_IMAGE, img_data, t=0)
            sample.set(RGB_IMAGE_SIZE, [self._hyperparams['image_channels'],
                                        self._hyperparams['image_width'],
                                        self._hyperparams['image_height']], t=None)
            if IMAGE_FEAT in self.obs_data_types:
                raise ValueError('Image features should not be in observation, just state')
            if feature_fn is not None:
                obs = sample.get_obs()  # Assumes that the rest of the sample has been populated
                sample.set(IMAGE_FEAT, feature_fn(obs), t=0)
            else:
                # TODO - need better solution than setting this to 0.
                sample.set(IMAGE_FEAT, np.zeros((self._hyperparams['sensor_dims'][IMAGE_FEAT],)), t=0)
        return sample
Beispiel #20
0
    def _init_sample(self, condition, feature_fn=None):
        """
        Construct a new sample and fill in the first time step.
        Args:
            condition: Which condition to initialize.
            feature_fn: funciton to comptue image features from the observation.
        """
        sample = Sample(self)

        # Initialize world/run kinematics
        #self._init(condition)

        # Initialize sample with stuff from _data
        data = self._world[condition].reset(
        )  #get data from mj_world, condition-specific
        sample.set(JOINT_ANGLES, data[0:7], t=0)  #Set _data in sample class
        sample.set(JOINT_VELOCITIES, data[7:14], t=0)
        sample.set(END_EFFECTOR_POINTS, data[14:24], t=0)
        sample.set(END_EFFECTOR_POINT_VELOCITIES, data[24:34], t=0)
        sample.set(END_EFFECTOR_POINT_JACOBIANS, 0.0, t=0)
Beispiel #21
0
    def _init_sample(self, condition):
        """
        Construct a new sample and fill in the first time step.
        Args:
            condition: Which condition to initialize.
        """
        sample = Sample(self)
        sample.set(JOINT_ANGLES,
                   self._hyperparams['x0'][condition][self._joint_idx], t=0)
        sample.set(JOINT_VELOCITIES,
                   self._hyperparams['x0'][condition][self._vel_idx], t=0)
        self._data = self._world[condition].get_data()
        eepts = self._data['site_xpos'].flatten()
        sample.set(END_EFFECTOR_POINTS, eepts, t=0)
        sample.set(END_EFFECTOR_POINT_VELOCITIES, np.zeros_like(eepts), t=0)
        jac = np.zeros([eepts.shape[0], self._model[condition]['nq']])
        for site in range(eepts.shape[0] // 3):
            idx = site * 3
            jac[idx:(idx+3), :] = self._world[condition].get_jac_site(site)
        sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=0)

        # save initial image to meta data
        self._world[condition].plot(self._hyperparams['x0'][condition])
        img = self._world[condition].get_image_scaled(self._hyperparams['image_width'],
                                                      self._hyperparams['image_height'])
        # mjcpy image shape is [height, width, channels],
        # dim-shuffle it for later conv-net processing,
        # and flatten for storage
        img_data = np.transpose(img["img"], (2, 1, 0)).flatten()
        # if initial image is an observation, replicate it for each time step
        if CONTEXT_IMAGE in self.obs_data_types:
            sample.set(CONTEXT_IMAGE, np.tile(img_data, (self.T, 1)), t=None)
        else:
            sample.set(CONTEXT_IMAGE, img_data, t=None)
        sample.set(CONTEXT_IMAGE_SIZE, np.array([self._hyperparams['image_channels'],
                                                self._hyperparams['image_width'],
                                                self._hyperparams['image_height']]), t=None)
        # only save subsequent images if image is part of observation
        if RGB_IMAGE in self.obs_data_types:
            sample.set(RGB_IMAGE, img_data, t=0)
            sample.set(RGB_IMAGE_SIZE, [self._hyperparams['image_channels'],
                                        self._hyperparams['image_width'],
                                        self._hyperparams['image_height']], t=None)
        return sample
Beispiel #22
0
    def _init_sample(self, condition, feature_fn=None):
        """
        Construct a new sample and fill in the first time step.
        Args:
            condition: Which condition to initialize.
            feature_fn: funciton to comptue image features from the observation.
        """
        sample = Sample(self)
        # Initialize world/run kinematicscost
        jac = self._kin_trial.jacobian()
        X, image = self._get_current_state(t=0)
        if self.take_video:
            self.rgb_writer.append_data(image)
        # X = np.concatenate([geom_dist_ee_to_anchor, geom_dist_object2_to_anchor, pos, dpos, gripper_binary])
        geom_dist_ee_to_anchor = X[0:3]
        geom_dist_object2_to_anchor = X[3:6]
        pos = X[6:9]
        dpos = X[9:12]
        gripper_binary = X[12]
        q = X[13:20]
        dq = X[20:27]
        emb = X[27:28]
        sample.set(OBJECT_POSE,
                   np.concatenate(
                       [geom_dist_ee_to_anchor, geom_dist_object2_to_anchor]),
                   t=0)
        sample.set(JOINT_ANGLES, np.asarray(q), t=0)
        sample.set(JOINT_VELOCITIES, np.asarray(dq), t=0)
        sample.set(END_EFFECTOR_POINTS, pos, t=0)
        sample.set(END_EFFECTOR_POINT_JACOBIANS, jac[:3, :], t=0)
        sample.set(END_EFFECTOR_POINT_VELOCITIES, np.asarray(dpos), t=0)
        sample.set(IMAGE_FEATURE, np.asarray(emb), t=0)

        # sample.set(END_EFFECTOR_ORIENTATIONS, np.asarray(orn), t=0)
        # sample.set(END_EFFECTOR_ANGULAR_VELOCITIES, np.asarray(dorn), t=0)
        return X, sample
Beispiel #23
0
    def sample(
        self,
        policy,
        condition,
        verbose=True,
        save=True,
        noisy=True,
        use_TfController=False,
        timeout=None,
        reset_cond=None,
        record=False
    ):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
            use_TfController: Whether to use the syncronous TfController
        Returns:
            sample: A Sample object.
        """

        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Get a new sample
        sample = Sample(self)

        self.env.video_callable = lambda episode_id, record=record: record
        # Get initial state
        self.env.seed(None if reset_cond is None else self.x0[reset_cond])
        obs = self.env.reset()
        if self._hyperparams.get('initial_step', 0) > 0:
            # Take one random step to get a slightly random initial state distribution
            U_initial = (self.env.action_space.high - self.env.action_space.low
                        ) / 12 * np.random.normal(size=self.dU) * self._hyperparams['initial_step']
            obs = self.env.step(U_initial)[0]
        self.set_states(sample, obs, 0)
        U_0 = policy.act(sample.get_X(0), sample.get_obs(0), 0, noise)
        sample.set(ACTION, U_0, 0)
        for t in range(1, self.T):
            if not record and self.render:
                self.env.render(mode='human')  # TODO add hyperparam

            # Get state
            obs, _, done, _ = self.env.step(sample.get_U(t - 1))
            self.set_states(sample, obs, t)

            # Get action
            U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise)
            sample.set(ACTION, U_t, t)

            if done and t < self.T - 1:
                raise Exception('Iteration ended prematurely %d/%d' % (t + 1, self.T))
        if save:
            self._samples[condition].append(sample)
        self.active = False
        #print("X", sample.get_X())
        #print("U", sample.get_U())
        return sample
Beispiel #24
0
    def _init_sample(self, condition, feature_fn=None):
        """
        Construct a new sample and fill in the first time step.
        Args:
            condition: Which condition to initialize.
            feature_fn: funciton to comptue image features from the observation.
        """
        sample = Sample(self)

        self.indy.joint_move_to(self._hyperparams[‘x0’][condition][0:6])
        # Initialize sample with stuff from _data
        # indy : get joint positions
        self.prev_positions = self.indy.get_joint_pos()

        ## TODO : replace below line with indy function
        # get indy joint positions
        sample.set(JOINT_ANGLES, self.prev_positions, t=0)

        # get indy joint velocities
        sample.set(JOINT_VELOCITIES, self.indy.get_joint_vel(), t=0)

        # get indy end effector positions
        ee_point = self.indy.get_task_pos()[:3]
        sample.set(END_EFFECTOR_POINTS, ee_point, t=0)
        # sample.set(END_EFFECTOR_POINTS, list(ee_point), t=t+1)

        # get indy end effector velocity
        vel = self.indy.get_task_vel()
        ee_vel = vel[:3]
        ee_omg = vel[3:]
        sample.set(END_EFFECTOR_POINT_VELOCITIES, np.array(list(ee_vel) + list(ee_omg)), t=0)

        # get indy jacobian
        ### please add a function that retreive jacobian matrix here.
        sample.set(END_EFFECTOR_POINT_JACOBIANS, self.indy, t=0)
        
        ## TODO : check whether below line is neccessary or not.
        if (END_EFFECTOR_POINTS_NO_TARGET in self._hyperparams['obs_include']):
            sample.set(END_EFFECTOR_POINTS_NO_TARGET, np.delete(eepts, self._hyperparams['target_idx']), t=0)
            sample.set(END_EFFECTOR_POINT_VELOCITIES_NO_TARGET, np.delete(np.zeros_like(eepts), self._hyperparams['target_idx']), t=0)
        
        ## TODO : enable this again when after install camera
        
        # only save subsequent images if image is part of observation
        if RGB_IMAGE in self.obs_data_types:
            ## TODO : replace below line with other function
            # ex 1:
            # self.img = self.baxter.get_baxter_camera_image()
            # sample.set(RGB_IMAGE, np.transpose(self.img, (2, 1, 0)).flatten(), t = 0)
            # ex 2:
            # sample.set(RGB_IMAGE, img_data, t=0)

            sample.set(RGB_IMAGE_SIZE, [self._hyperparams['image_channels'],
                                        self._hyperparams['image_width'],
                                        self._hyperparams['image_height']], t=None)
            if IMAGE_FEAT in self.obs_data_types:
                raise ValueError('Image features should not be in observation, just state')
            if feature_fn is not None:
                obs = sample.get_obs()  # Assumes that the rest of the sample has been populated
                sample.set(IMAGE_FEAT, feature_fn(obs), t=0)
            else:
                sample.set(IMAGE_FEAT, np.zeros((self._hyperparams['sensor_dims'][IMAGE_FEAT],)), t=0)
     	
        return sample
Beispiel #25
0
 def _init_sample(self, condition):
     """
     Construct a new sample and fill in the first time step.
     Args:
         condition: Which condition to initialize.
     """
     sample = Sample(self)
     sample.set(JOINT_ANGLES,
                self._hyperparams['x0'][condition][self._joint_idx], t=0)
     sample.set(JOINT_VELOCITIES,
                self._hyperparams['x0'][condition][self._vel_idx], t=0)
     self._data = self._world[condition].get_data()
     eepts = self._data['site_xpos'].flatten()
     sample.set(END_EFFECTOR_POINTS, eepts, t=0)
     sample.set(END_EFFECTOR_POINT_VELOCITIES, np.zeros_like(eepts), t=0)
     jac = np.zeros([eepts.shape[0], self._model[condition]['nq']])
     for site in range(eepts.shape[0] // 3):
         idx = site * 3
         jac[idx:(idx+3), :] = self._world[condition].get_jac_site(site)
     sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=0)
     return sample
Beispiel #26
0
    def sample(self, policy, condition, save=True, noisy=True, reset_cond=None, **kwargs):
        """
        Reset and execute a policy and collect a sample.
        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            verbose: Unused for this agent.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
            use_TfController: Whether to use the syncronous TfController
        Returns:
            sample: A Sample object.
        """
        # Get a new sample
        sample = Sample(self)
        sample_ok = False
        while not sample_ok:
            if not self.debug:
                self.reset(reset_cond)

            self.__init_opcua()

            if noisy:
                noise = generate_noise(self.T, self.dU, self._hyperparams)
            else:
                noise = np.zeros((self.T, self.dU))

            # Execute policy over a time period of [0,T]
            start = time.time()
            for t in range(self.T):
                # Read sensors and store sensor data in sample
                def store_sensor(sensor):
                    sample.set(sensor, self.read_sensor(sensor), t)

                self.pool.map(store_sensor, self.sensors)
                # Override sensors
                for override in self.sensor_overrides:
                    if override['condition'](t):
                        sensor = override['sensor']
                        sample.set(sensor, np.copy(override['value']), t)

                print('X_%02d' % t, sample.get_X(t))

                # Get action
                U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise)

                # Override actuators
                for override in self.actuator_overrides:
                    if override['condition'](t):
                        actuator = override['actuator']
                        U_t[self._u_data_idx[actuator]] = np.copy(override['value'])

                # Send signals
                self.send_signals(t)

                # Perform action
                for actuator in self._u_data_idx:
                    self.write_actuator(actuator, U_t[self._u_data_idx[actuator]])
                sample.set(ACTION, U_t, t)

                print('U_%02d' % t, U_t)

                # Check if agent is keeping up
                sleep_time = start + (t + 1) * self.dt - time.time()
                if sleep_time < 0:
                    logging.critical("Agent can't keep up. %fs bedind." % sleep_time)
                elif sleep_time < self.dt / 2:
                    logging.warning(
                        "Agent may not keep up (%.0f percent busy)" % (((self.dt - sleep_time) / self.dt) * 100)
                    )

                # Wait for next timestep
                if sleep_time > 0 and not self.debug:
                    time.sleep(sleep_time)
            if save:
                self._samples[condition].append(sample)
            self.active = False
            self.finalize_sample()

            sample_ok = input('Continue?') == 'y'
            if not sample_ok:
                print('Repeating')
        return sample
Beispiel #27
0
    def _init_sample(self, condition, feature_fn=None):
        """
        Construct a new sample and fill in the first time step.
        Args:
            condition: Which condition to initialize.
            feature_fn: funciton to comptue image features from the observation.
        """
        sample = Sample(self)

        # Initialize world/run kinematics
        self._init(condition)

        # Initialize sample with stuff from _data
        data = self._world[condition].get_data()
        sample.set(JOINT_ANGLES, data['qpos'].flatten(), t=0)
        sample.set(JOINT_VELOCITIES, data['qvel'].flatten(), t=0)
        eepts = data['site_xpos'].flatten()
        sample.set(END_EFFECTOR_POINTS, eepts, t=0)
        sample.set(END_EFFECTOR_POINT_VELOCITIES, np.zeros_like(eepts), t=0)

        if (END_EFFECTOR_POINTS_NO_TARGET in self._hyperparams['obs_include']):
            sample.set(END_EFFECTOR_POINTS_NO_TARGET, np.delete(eepts, self._hyperparams['target_idx']), t=0)
            sample.set(END_EFFECTOR_POINT_VELOCITIES_NO_TARGET, np.delete(np.zeros_like(eepts), self._hyperparams['target_idx']), t=0)

        jac = np.zeros([eepts.shape[0], self._model[condition]['nq']])
        for site in range(eepts.shape[0] // 3):
            idx = site * 3
            jac[idx:(idx+3), :] = self._world[condition].get_jac_site(site)
        sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=0)

        # save initial image to meta data
        self._world[condition].plot(self._hyperparams['x0'][condition])
        img = self._world[condition].get_image_scaled(self._hyperparams['image_width'],
                                                      self._hyperparams['image_height'])
        # mjcpy image shape is [height, width, channels],
        # dim-shuffle it for later conv-net processing,
        # and flatten for storage
        img_data = np.transpose(img["img"], (2, 1, 0)).flatten()
        # if initial image is an observation, replicate it for each time step
        if CONTEXT_IMAGE in self.obs_data_types:
            sample.set(CONTEXT_IMAGE, np.tile(img_data, (self.T, 1)), t=None)
        else:
            sample.set(CONTEXT_IMAGE, img_data, t=None)
        sample.set(CONTEXT_IMAGE_SIZE, np.array([self._hyperparams['image_channels'],
                                                self._hyperparams['image_width'],
                                                self._hyperparams['image_height']]), t=None)
        # only save subsequent images if image is part of observation
        if RGB_IMAGE in self.obs_data_types:
            sample.set(RGB_IMAGE, img_data, t=0)
            sample.set(RGB_IMAGE_SIZE, [self._hyperparams['image_channels'],
                                        self._hyperparams['image_width'],
                                        self._hyperparams['image_height']], t=None)
            if IMAGE_FEAT in self.obs_data_types:
                raise ValueError('Image features should not be in observation, just state')
            if feature_fn is not None:
                obs = sample.get_obs()  # Assumes that the rest of the sample has been populated
                sample.set(IMAGE_FEAT, feature_fn(obs), t=0)
            else:
                # TODO - need better solution than setting this to 0.
                sample.set(IMAGE_FEAT, np.zeros((self._hyperparams['sensor_dims'][IMAGE_FEAT],)), t=0)
        return sample
Beispiel #28
0
    def _eval_cost(self, cond, prev_cost=False):
        """
        Evaluate costs for all samples for a condition.
        Args:
            cond: Condition to evaluate cost on.
            prev: Whether or not to use previous_cost (for ioc stepadjust)
        """
        # Constants.
        T, dX, dU = self.T, self.dX, self.dU

        synN = self._hyperparams['synthetic_cost_samples']
        if synN > 0:
            agent = self.cur[cond].sample_list.get_samples()[0].agent
            X, U, _ = self._traj_samples(cond, synN)
            syn_samples = []
            for i in range(synN):
                sample = Sample(agent)
                sample.set_XU(X[i, :, :], U[i, :, :])
                syn_samples.append(sample)
            all_samples = SampleList(syn_samples +
                self.cur[cond].sample_list.get_samples())
        else:
          all_samples = self.cur[cond].sample_list
        N = len(all_samples)

        # Compute cost.
        cs = np.zeros((N, T))
        cc = np.zeros((N, T))
        cv = np.zeros((N, T, dX+dU))
        Cm = np.zeros((N, T, dX+dU, dX+dU))
        if self._hyperparams['ioc']:
            cgt = np.zeros((N, T))
        for n in range(N):
            sample = all_samples[n]
            # Get costs.
            if prev_cost:
              l, lx, lu, lxx, luu, lux = self.previous_cost[cond].eval(sample)
            else:
              l, lx, lu, lxx, luu, lux = self.cost[cond].eval(sample)
            # Compute the ground truth cost
            if self._hyperparams['ioc'] and n >= synN:
                l_gt, _, _, _, _, _ = self.gt_cost[cond].eval(sample)
                cgt[n, :] = l_gt
            cc[n, :] = l
            cs[n, :] = l

            # Assemble matrix and vector.
            cv[n, :, :] = np.c_[lx, lu]
            Cm[n, :, :, :] = np.concatenate(
                (np.c_[lxx, np.transpose(lux, [0, 2, 1])], np.c_[lux, luu]),
                axis=1
            )

            # Adjust for expanding cost around a sample.
            X = sample.get_X()
            U = sample.get_U()
            yhat = np.c_[X, U]
            rdiff = -yhat
            rdiff_expand = np.expand_dims(rdiff, axis=2)
            cv_update = np.sum(Cm[n, :, :, :] * rdiff_expand, axis=1)
            cc[n, :] += np.sum(rdiff * cv[n, :, :], axis=1) + 0.5 * \
                    np.sum(rdiff * cv_update, axis=1)
            cv[n, :, :] += cv_update

        # Fill in cost estimate.
        if prev_cost:
          traj_info = self.cur[cond].prevcost_traj_info
          traj_info.dynamics = self.cur[cond].traj_info.dynamics
          traj_info.x0sigma = self.cur[cond].traj_info.x0sigma
          traj_info.x0mu = self.cur[cond].traj_info.x0mu
        else:
          traj_info = self.cur[cond].traj_info
          self.cur[cond].cs = cs[synN:]  # True value of cost.
        traj_info.cc = np.mean(cc, 0)  # Constant term (scalar).
        traj_info.cv = np.mean(cv, 0)  # Linear term (vector).
        traj_info.Cm = np.mean(Cm, 0)  # Quadratic term (matrix).

        if self._hyperparams['ioc']:
            self.cur[cond].cgt = cgt[synN:]