def eval(self, sample):
        """
        Evaluate cost function and derivatives on a sample.
        Args:
            sample:  A single sample
        """
        T, dU, dX = sample.T, sample.dU, sample.dX
        orig_A, orig_b = self._hyperparams['A'], self._hyperparams['b']

        # Discretize waypoint time steps.
        waypoint_step = np.ceil(T * self._hyperparams['waypoint_time'])
        A = np.zeros((T, dX + dU, dX + dU))
        b = np.zeros((T, dX + dU))

        if not isinstance(self._hyperparams['ramp_option'], list):
            self._hyperparams['ramp_option'] = [
                self._hyperparams['ramp_option'] for _ in waypoint_step
            ]

        # Set up time-varying matrix and vector.
        start = 0
        for i in range(len(waypoint_step)):
            wpm = get_ramp_multiplier(self._hyperparams['ramp_option'][i],
                                      waypoint_step[i] - start)
            for t in range(start, int(waypoint_step[i])):
                A[t, :, :] = wpm[t - start] * orig_A[i, :, :]
                b[t, :] = wpm[t - start] * orig_b[i, :]
            start = int(waypoint_step[i])

        # Evaluate distance function.
        XU = np.concatenate([sample.get_X(), sample.get_U()], axis=1)
        dist = np.zeros((T, dX + dU))
        for t in range(T):
            dist[t, :] = A[t, :, :].dot(XU[t, :]) + b[t, :]

        ix, iu = slice(dX), slice(dX, dX + dU)
        # Compute the loss.
        Al, Aly, Alyy = self._evalloss(dist)
        Alx = Aly[:, ix]
        Alu = Aly[:, iu]
        Alxx = Alyy[:, ix, ix]
        Aluu = Alyy[:, iu, iu]
        Alux = Alyy[:, ix, iu]

        # Compute state derivatives and value. Loss is the same.
        l = Al
        # First derivative given by A'*Aly.
        # Second derivative given by A'*Alyy*A.
        lx, lu = np.zeros((T, dX)), np.zeros((T, dU))
        lxx, luu = np.zeros((T, dX, dX)), np.zeros((T, dU, dU))
        lux = np.zeros((T, dU, dX))
        for t in range(T):
            lx[t, :] = A[t, ix, ix].T.dot(Alx[t, :])
            lu[t, :] = A[t, iu, iu].T.dot(Alu[t, :])
            lxx[t, :, :] = A[t, ix, ix].T.dot(Alxx[t, :, :]).dot(A[t, ix, ix])
            luu[t, :, :] = A[t, iu, iu].T.dot(Aluu[t, :, :]).dot(A[t, iu, iu])
            lux[t, :, :] = A[t, ix, iu].T.dot(Alux[t, :, :]).dot(A[t, iu, ix])

        return l, lx, lu, lxx, luu, lux
Example #2
0
    def eval(self, sample):
        """
        Evaluate cost function and derivatives on a sample.
        Args:
            sample:  A single sample
        """
        T, dU, dX = sample.T, sample.dU, sample.dX
        orig_A, orig_b = self._hyperparams['A'], self._hyperparams['b']

        # Discretize waypoint time steps.
        waypoint_step = np.ceil(T * self._hyperparams['waypoint_time'])
        A = np.zeros((T, dX+dU, dX+dU))
        b = np.zeros((T, dX+dU))

        if not isinstance(self._hyperparams['ramp_option'], list):
            self._hyperparams['ramp_option'] = [
                self._hyperparams['ramp_option'] for _ in waypoint_step
            ]

        # Set up time-varying matrix and vector.
        start = 0
        for i in range(len(waypoint_step)):
            wpm = get_ramp_multiplier(self._hyperparams['ramp_option'][i],
                                      waypoint_step[i] - start)
            for t in range(start, int(waypoint_step[i])):
                A[t, :, :] = wpm[t-start] * orig_A[i, :, :]
                b[t, :] = wpm[t-start] * orig_b[i, :]
            start = int(waypoint_step[i])

        # Evaluate distance function.
        XU = np.concatenate([sample.get_X(), sample.get_U()], axis=1)
        dist = np.zeros((T, dX+dU))
        for t in range(T):
            dist[t, :] = A[t, :, :].dot(XU[t, :]) + b[t, :]

        ix, iu = slice(dX), slice(dX, dX+dU)
        # Compute the loss.
        Al, Aly, Alyy = self._evalloss(dist)
        Alx = Aly[:, ix]
        Alu = Aly[:, iu]
        Alxx = Alyy[:, ix, ix]
        Aluu = Alyy[:, iu, iu]
        Alux = Alyy[:, ix, iu]

        # Compute state derivatives and value. Loss is the same.
        l = Al
        # First derivative given by A'*Aly.
        # Second derivative given by A'*Alyy*A.
        lx, lu = np.zeros((T, dX)), np.zeros((T, dU))
        lxx, luu = np.zeros((T, dX, dX)), np.zeros((T, dU, dU))
        lux = np.zeros((T, dU, dX))
        for t in range(T):
            lx[t, :] = A[t, ix, ix].T.dot(Alx[t, :])
            lu[t, :] = A[t, iu, iu].T.dot(Alu[t, :])
            lxx[t, :, :] = A[t, ix, ix].T.dot(Alxx[t, :, :]).dot(A[t, ix, ix])
            luu[t, :, :] = A[t, iu, iu].T.dot(Aluu[t, :, :]).dot(A[t, iu, iu])
            lux[t, :, :] = A[t, ix, iu].T.dot(Alux[t, :, :]).dot(A[t, iu, ix])

        return l, lx, lu, lxx, luu, lux
    def eval(self, sample):
        """
        Evaluate cost function and derivatives on a sample.
        Args:
            sample:  A single sample
        """
        T = sample.T
        Du = sample.dU
        Dx = sample.dX

        final_l = np.zeros(T)
        final_lu = np.zeros((T, Du))
        final_lx = np.zeros((T, Dx))
        final_luu = np.zeros((T, Du, Du))
        final_lxx = np.zeros((T, Dx, Dx))
        final_lux = np.zeros((T, Du, Dx))

        for data_type in self._hyperparams['data_types']:
            config = self._hyperparams['data_types'][data_type]
            wp = config['wp']
            tgt = config['target_state']
            x = sample.get(data_type)

            if 'average' in config:
                x = np.mean(x.reshape((T, ) + config['average']), axis=1)
                _, dim_sensor = x.shape
                num_sensor = config['average'][0]
                l = x.dot(np.array(wp).T)
                ls = np.tile(np.array(wp), [T, num_sensor]) / num_sensor
                lss = np.zeros(
                    (T, dim_sensor * num_sensor, dim_sensor * num_sensor))
            else:
                _, dim_sensor = x.shape

                wpm = get_ramp_multiplier(self._hyperparams['ramp_option'],
                                          T,
                                          wp_final_multiplier=self.
                                          _hyperparams['wp_final_multiplier'])
                wp = wp * np.expand_dims(wpm, axis=-1)
                # Compute state penalty.
                #print(x.shape)
                dist = x - tgt

                # Evaluate penalty term.
                l, ls, lss = evall1l2term(
                    wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]),
                    np.zeros((T, dim_sensor, dim_sensor, dim_sensor)),
                    self._hyperparams['l1'], self._hyperparams['l2'],
                    self._hyperparams['alpha'])

            final_l += l

            sample.agent.pack_data_x(final_lx, ls, data_types=[data_type])
            sample.agent.pack_data_x(final_lxx,
                                     lss,
                                     data_types=[data_type, data_type])
        return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
Example #4
0
    def eval(self, sample, obj_val_only = False):
        """
        Evaluate cost function and derivatives on a sample.
        Args:
            sample:  A single sample
        """
        T = sample.T
        Du = sample.dU
        Dx = sample.dX
        
        cur_fcn = sample.agent.fcns[self.cur_cond_idx]['fcn_obj']
        
        final_l = np.zeros(T)
        
        if not obj_val_only:
            final_lu = np.zeros((T, Du))
            final_lx = np.zeros((T, Dx))
            final_luu = np.zeros((T, Du, Du))
            final_lxx = np.zeros((T, Dx, Dx))
            final_lux = np.zeros((T, Du, Dx))

        x = sample.get(CUR_LOC)
        _, dim = x.shape
        
        # Time step-specific weights
        wpm = get_ramp_multiplier(
            self._hyperparams['ramp_option'], T,
            wp_final_multiplier=self._hyperparams['wp_final_multiplier'], 
            wp_custom=self._hyperparams['wp_custom'] if 'wp_custom' in self._hyperparams else None
        )
        
        if not obj_val_only:
            ls = np.empty((T, dim))
            lss = np.empty((T, dim, dim))
        
        cur_fcn.new_sample(batch_size="all")    # Get noiseless gradient
        for t in range(T):
            final_l[t] = cur_fcn.evaluate(x[t,:][:,None])
            if not obj_val_only:
                ls[t,:] = cur_fcn.grad(x[t,:][:,None])[:,0]
                lss[t,:,:] = cur_fcn.hess(x[t,:][:,None])
        
        final_l = final_l * wpm
        
        if not obj_val_only:
            ls = ls * wpm[:,None]
            lss = lss * wpm[:,None,None]
        
            # Equivalent to final_lx[:,sensor_start_idx:sensor_end_idx] = ls
            sample.agent.pack_data_x(final_lx, ls, data_types=[CUR_LOC])
            # Equivalent to final_lxx[:,sensor_start_idx:sensor_end_idx,sensor_start_idx:sensor_end_idx] = lss
            sample.agent.pack_data_x(final_lxx, lss, data_types=[CUR_LOC, CUR_LOC])
        
        if obj_val_only:
            return (final_l,)
        else:
            return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
Example #5
0
    def eval(self, sample):
        """
        Evaluate cost function and derivatives on a sample.
        Args:
            sample:  A single sample
        """
        T = sample.T
        dX = sample.dX
        dU = sample.dU

        # Initialize terms.
        l = np.zeros(T)
        lu = np.zeros((T, dU))
        lx = np.zeros((T, dX))
        luu = np.zeros((T, dU, dU))
        lxx = np.zeros((T, dX, dX))
        lux = np.zeros((T, dU, dX))

        wp = self._hyperparams['wp']

        obs = sample.get(self._hyperparams['obstacle_type'])
        x = sample.get(self._hyperparams['position_type'])
        pot = sample.get(self._hyperparams['potential_type'])

        wpm = get_ramp_multiplier(
            self._hyperparams['ramp_option'], T,
            wp_final_multiplier=self._hyperparams['wp_final_multiplier']
        )
        wp = wp * np.expand_dims(wpm, axis=-1)

        # Compute state penalty.
        obs_dist = x - obs

        sigma_square = self._hyperparams['sigma_square']
        p_collision = np.exp(- np.sum(obs_dist ** 2, axis=1) / sigma_square)
        l_collision = p_collision * self._hyperparams['wp_col'] * wpm

        pot_prev = np.zeros_like(pot[:, 0])
        pot_prev[0] = 0 # Penalty for first
        pot_prev[1:] = pot[:-1,0]
        p_survivability = 1 - p_collision
        l_progress = p_survivability * (pot[:, 0] - pot_prev) * self._hyperparams['wp_nf'] * wpm

        l = l_collision + l_progress

        # Evaluate penalty term.
        # l, ls, lss = evalhinglel2loss(
        #     wp, dist, self._hyperparams['d_safe'], self._hyperparams['l2'],
        # )
        #
        # # Add to current terms.
        # sample.agent.pack_data_x(lx, ls, data_types=[data_type])
        # sample.agent.pack_data_x(lxx, lss,
        #                          data_types=[data_type, data_type])

        return l, lx, lu, lxx, luu, lux
Example #6
0
    def eval(self, sample):
        """
        Evaluate forward kinematics (end-effector penalties) cost.
        Temporary note: This implements the 'joint' penalty type from
            the matlab code, with the velocity/velocity diff/etc.
            penalties removed. (use CostState instead)
        Args:
            sample: A single sample.
        """
        T = sample.T
        dX = sample.dX
        dU = sample.dU

        wpm = get_ramp_multiplier(
            self._hyperparams['ramp_option'], T,
            wp_final_multiplier=self._hyperparams['wp_final_multiplier']
        )
        wp = self._hyperparams['wp'] * np.expand_dims(wpm, axis=-1)

        # Initialize terms.
        l = np.zeros(T)
        lu = np.zeros((T, dU))
        lx = np.zeros((T, dX))
        luu = np.zeros((T, dU, dU))
        lxx = np.zeros((T, dX, dX))
        lux = np.zeros((T, dU, dX))

        # Choose target.
        tgt = self._hyperparams['target_end_effector']
        pt = sample.get(END_EFFECTOR_POINTS)
        dist = pt - tgt
        # TODO - These should be partially zeros so we're not double
        #        counting.
        #        (see pts_jacobian_only in matlab costinfos code)
        jx = sample.get(END_EFFECTOR_POINT_JACOBIANS)
        print "tgt: ", tgt.shape, \
            "pt: ", pt.shape, \
            "jx: ", jx.shape

        # Evaluate penalty term. Use estimated Jacobians and no higher
        # order terms.
        jxx_zeros = np.zeros((T, dist.shape[1], jx.shape[2], jx.shape[2]))
        # evall1l2term or evallogl2term
        l, ls, lss = self._hyperparams['evalnorm'](
            wp, dist, jx, jxx_zeros, self._hyperparams['l1'],
            self._hyperparams['l2'], self._hyperparams['alpha']
        )
        # Add to current terms.
        sample.agent.pack_data_x(lx, ls, data_types=[JOINT_ANGLES])
        sample.agent.pack_data_x(lxx, lss,
                                 data_types=[JOINT_ANGLES, JOINT_ANGLES])

        return l, lx, lu, lxx, luu, lux
Example #7
0
    def eval(self, sample):
        """
        Evaluate cost function and derivatives on a sample.
        Args:
            sample:  A single sample
        """
        T = sample.T
        Du = sample.dU
        Dx = sample.dX

        final_l = np.zeros(T)
        final_lu = np.zeros((T, Du))
        final_lx = np.zeros((T, Dx))
        final_luu = np.zeros((T, Du, Du))
        final_lxx = np.zeros((T, Dx, Dx))
        final_lux = np.zeros((T, Du, Dx))

        for data_type in self._hyperparams['data_types']:
            config = self._hyperparams['data_types'][data_type]
            wp = config['wp']
            tgt = config['target_state']
            x = sample.get(data_type)
            _, dim_sensor = x.shape

            wpm = get_ramp_multiplier(
                self._hyperparams['ramp_option'],
                T,
                wp_final_multiplier=self._hyperparams['wp_final_multiplier'])
            wp = wp * np.expand_dims(wpm, axis=-1)

            # Compute state penalty
            # dist = x[8] - tgt[2]
            dist = x - tgt
            dist[:, :8] = 0.
            dist[:, 9:12] = 0
            # print('dist', dist)

            # Evaluate penalty term
            l, ls, lss = evall1l2term(
                wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]),
                np.zeros((T, dim_sensor, dim_sensor, dim_sensor)),
                self._hyperparams['l1'], self._hyperparams['l2'],
                self._hyperparams['alpha'])

            final_l += l

            sample.agent.pack_data_x(final_lx, ls, data_types=[data_type])
            sample.agent.pack_data_x(final_lxx,
                                     lss,
                                     data_types=[data_type, data_type])

        return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
Example #8
0
    def eval(self, sample):
        """
        Evaluate forward kinematics (end-effector penalties) cost.
        Temporary note: This implements the 'joint' penalty type from
            the matlab code, with the velocity/velocity diff/etc.
            penalties removed. (use CostState instead)
        Args:
            sample: A single sample.
        """
        T = sample.T
        dX = sample.dX
        dU = sample.dU

        wpm = get_ramp_multiplier(
            self._hyperparams['ramp_option'], T,
            wp_final_multiplier=self._hyperparams['wp_final_multiplier']
        )
        wp = self._hyperparams['wp'] * np.expand_dims(wpm, axis=-1)

        # Initialize terms.
        l = np.zeros(T)
        lu = np.zeros((T, dU))
        lx = np.zeros((T, dX))
        luu = np.zeros((T, dU, dU))
        lxx = np.zeros((T, dX, dX))
        lux = np.zeros((T, dU, dX))

        # Choose target.
        tgt = self._hyperparams['target_end_effector']
        pt = sample.get(END_EFFECTOR_POINTS)
        dist = pt - tgt
        # TODO - These should be partially zeros so we're not double
        #        counting.
        #        (see pts_jacobian_only in matlab costinfos code)
        jx = sample.get(END_EFFECTOR_POINT_JACOBIANS)

        # Evaluate penalty term. Use estimated Jacobians and no higher
        # order terms.
        jxx_zeros = np.zeros((T, dist.shape[1], jx.shape[2], jx.shape[2]))
        l, ls, lss = self._hyperparams['evalnorm'](
            wp, dist, jx, jxx_zeros, self._hyperparams['l1'],
            self._hyperparams['l2'], self._hyperparams['alpha']
        )
        # Add to current terms.
        sample.agent.pack_data_x(lx, ls, data_types=[JOINT_ANGLES])
        sample.agent.pack_data_x(lxx, lss,
                                 data_types=[JOINT_ANGLES, JOINT_ANGLES])

        return l, lx, lu, lxx, luu, lux
Example #9
0
    def eval(self, sample):
        """
        Evaluate cost function and derivatives on a sample.
        Args:
            sample:  A single sample
        """
        T = sample.T
        Du = sample.dU
        Dx = sample.dX

        final_l = np.zeros(T)
        final_lu = np.zeros((T, Du))
        final_lx = np.zeros((T, Dx))
        final_luu = np.zeros((T, Du, Du))
        final_lxx = np.zeros((T, Dx, Dx))
        final_lux = np.zeros((T, Du, Dx))

        for data_type in self._hyperparams['data_types']:
            config = self._hyperparams['data_types'][data_type]
            wp = config['wp']
            tgt = config['target_state']
            x = sample.get(data_type)
            _, dim_sensor = x.shape

            wpm = get_ramp_multiplier(
                self._hyperparams['ramp_option'], T,
                wp_final_multiplier=self._hyperparams['wp_final_multiplier']
            )
            wp = wp * np.expand_dims(wpm, axis=-1)
            # Compute state penalty.
            dist = x - tgt

            # Evaluate penalty term.
            l, ls, lss = evall1l2term(
                wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]),
                np.zeros((T, dim_sensor, dim_sensor, dim_sensor)),
                self._hyperparams['l1'], self._hyperparams['l2'],
                self._hyperparams['alpha']
            )

            final_l += l

            sample.agent.pack_data_x(final_lx, ls, data_types=[data_type])
            sample.agent.pack_data_x(final_lxx, lss,
                                     data_types=[data_type, data_type])
        return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
Example #10
0
    def eval(self, sample):
        """
        Evaluate cost function and derivatives on a sample.
        Args:
            sample:  A single sample
        """
        T = sample.T
        dX = sample.dX
        dU = sample.dU

        # Initialize terms.
        l = np.zeros(T)
        lu = np.zeros((T, dU))
        lx = np.zeros((T, dX))
        luu = np.zeros((T, dU, dU))
        lxx = np.zeros((T, dX, dX))
        lux = np.zeros((T, dU, dX))

        data_type = self._hyperparams['position_type']

        wp = self._hyperparams['wp']
        obs = sample.get(self._hyperparams['obstacle_type'])
        x = sample.get(data_type)

        wpm = get_ramp_multiplier(
            self._hyperparams['ramp_option'],
            T,
            wp_final_multiplier=self._hyperparams['wp_final_multiplier'])
        wp = wp * np.expand_dims(wpm, axis=-1)
        # Compute state penalty.
        dist = x - obs

        # Evaluate penalty term.
        l, ls, lss = evalhinglel2loss(
            wp,
            dist,
            self._hyperparams['d_safe'],
            self._hyperparams['l2'],
        )

        # Add to current terms.
        sample.agent.pack_data_x(lx, ls, data_types=[data_type])
        sample.agent.pack_data_x(lxx, lss, data_types=[data_type, data_type])

        return l, lx, lu, lxx, luu, lux
    def eval(self, sample):
        """
        Evaluate cost function and derivatives on a sample.
        Args:
            sample:  A single sample
        """
        T = sample.T
        Du = sample.dU
        Dx = sample.dX

        final_l = np.zeros(T)
        final_lu = np.zeros((T, Du))
        final_lx = np.zeros((T, Dx))
        final_luu = np.zeros((T, Du, Du))
        final_lxx = np.zeros((T, Dx, Dx))
        final_lux = np.zeros((T, Du, Dx))

        tt_total = 0
        for data_type in self._hyperparams['data_types']:
            config = self._hyperparams['data_types'][data_type]
            wp = config['wp']
            tgt = config['target_state']
            max_distance = config['max_distance']
            outside_cost = config['outside_cost']
            inside_cost = config['inside_cost']
            x = sample.get(data_type)
            _, dim_sensor = x.shape

            wpm = get_ramp_multiplier(
                self._hyperparams['ramp_option'], T,
                wp_final_multiplier=self._hyperparams['wp_final_multiplier']
            )
            wp = wp * np.expand_dims(wpm, axis=-1)

            # Compute binary region penalty.
            dist = np.abs(x - tgt)
            for t in xrange(T):
                # If at least one of the coordinates is outside of 
                # the region assign outside_cost, otherwise inside_cost.
                if np.sum(dist[t]) > max_distance:
                    final_l[t] += outside_cost
                else:
                    final_l[t] += inside_cost 

        return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
    def eval(self, sample):
        T = sample.T
        dX = sample.dX
        dU = sample.dU

        wpm = get_ramp_multiplier(
            self._hyperparams['ramp_option'], T,
            wp_final_multiplier=self._hyperparams['wp_final_multiplier']
        )
        wp = self._hyperparams['wp'] * np.expand_dims(wpm, axis=-1)

        l = np.zeros(T)
        lu = np.zeros((T, dU))
        lx = np.zeros((T, dX))
        luu = np.zeros((T, dU, dU))
        lxx = np.zeros((T, dX, dX))
        lux = np.zeros((T, dU, dX))

        pt = sample.get(END_EFFECTOR_POINTS)

        # The following is hard-coded: determines position of gripper and block
        # and computes the distance between them
        pt_ee_l = pt[:, 0:3]
        pt_ee_r = pt[:, 3:6]
        pt_ee_avg = 0.5 * (pt_ee_r + pt_ee_l)
        pt_block = pt[:, 6:9]
        dist = pt_ee_avg - pt_block

        wp = np.ones((T,3))
        jx = sample.get(END_EFFECTOR_POINT_JACOBIANS)

        # Also hard-coded is this Jacobian calculation
        jx_1 = 0.5 * (jx[:, 0:3, :] + jx[:, 3:6, :]) - jx[:, 6:9, :]

        jxx_zeros = np.zeros((T, dist.shape[1], jx.shape[2], jx.shape[2]))
        l, ls, lss = self._hyperparams['evalnorm'](
            wp, dist, jx_1, jxx_zeros, self._hyperparams['l1'],
            self._hyperparams['l2'], self._hyperparams['alpha']
        )

        sample.agent.pack_data_x(lx, ls, data_types=[JOINT_ANGLES])
        sample.agent.pack_data_x(lxx, lss,
                                 data_types=[JOINT_ANGLES, JOINT_ANGLES])

        return l, lx, lu, lxx, luu, lux
Example #13
0
    def eval_mu(self, mu, T, Du, Dx):
        """
        Evaluate cost function and derivatives on a sample.
        Args:
            sample:  A single sample
        """

        final_l = np.zeros(T)
        final_lu = np.zeros((T, Du))
        final_lx = np.zeros((T, Dx))
        final_luu = np.zeros((T, Du, Du))
        final_lxx = np.zeros((T, Dx, Dx))
        final_lux = np.zeros((T, Du, Dx))

        for data_type in self._hyperparams['data_types']:

            config = self._hyperparams['data_types'][data_type]
            wp = config['wp']
            tgt = config['target_state']
            x = mu[:, 0:6]
            _, dim_sensor = x.shape

            wpm = get_ramp_multiplier(
                self._hyperparams['ramp_option'],
                T,
                wp_final_multiplier=self._hyperparams['wp_final_multiplier'])
            wp = wp * np.expand_dims(wpm, axis=-1)
            # Compute state penalty.
            dist = x - tgt

            # Evaluate penalty term.
            l, ls, lss = evall1l2term(
                wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]),
                np.zeros((T, dim_sensor, dim_sensor, dim_sensor)),
                self._hyperparams['l1'], self._hyperparams['l2'],
                self._hyperparams['alpha'])

            final_l += l
        return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
Example #14
0
    def eval(self, sample):
        """
        Evaluate cost function and derivatives on a sample.
        Args:
            sample:  A single sample
        """
        T, dU, dX = sample.T, sample.dU, sample.dX

        # Discretize waypoint time steps.
        waypoint_step = np.ceil(
            T * self._hyperparams['waypoint_time'])  # ex. [8., 20.]

        if not isinstance(self._hyperparams['ramp_option'], list):
            self._hyperparams['ramp_option'] = [
                self._hyperparams['ramp_option'] for _ in waypoint_step
            ]

        final_l = np.zeros(T)
        final_lu = np.zeros((T, dU))
        final_lx = np.zeros((T, dX))
        final_luu = np.zeros((T, dU, dU))
        final_lxx = np.zeros((T, dX, dX))
        final_lux = np.zeros((T, dU, dX))

        for data_type in self._hyperparams['data_types']:
            config = self._hyperparams['data_types'][data_type]

            start = 0
            for i in range(len(waypoint_step)):
                wp = config[i]['wp']
                tgt = config[i]['target_state']
                x = sample.get(data_type)
                ##print "\noriginal x:\n", x
                _, dim_sensor = x.shape

                wpm = get_ramp_multiplier(self._hyperparams['ramp_option'][i],
                                          int(waypoint_step[i] - start))
                wp = wp * np.expand_dims(wpm, axis=-1)

                x = x[start:int(waypoint_step[i]), :]
                ##print "modified x:\n", x
                ##print "target:\n", tgt

                # Compute state penalty.
                dist = x - tgt
                ##print "dist:\n", dist

                # Evaluate penalty term.
                l, ls, lss = evall1l2term(
                    wp, dist,
                    np.tile(np.eye(dim_sensor),
                            [int(waypoint_step[i] - start), 1, 1]),
                    np.zeros(
                        (int(waypoint_step[i] - start), dim_sensor, dim_sensor,
                         dim_sensor)), self._hyperparams['l1'],
                    self._hyperparams['l2'], self._hyperparams['alpha'])

                final_l[start:int(waypoint_step[i])] = l
                final_lx[start:int(waypoint_step[i]), 0:7] = ls
                final_lxx[start:int(waypoint_step[i]), 0:7, 0:7] = lss

                start = int(waypoint_step[i])
        ##print "Exit on cost lin wp ksu"
        ##exit()
        #return l, lx, lu, lxx, luu, lux
        return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
Example #15
0
    def eval(self, sample):
        """
        Evaluate cost function and derivatives on a sample.
        Args:
            sample:  A single sample
        """
        T = sample.T
        Du = sample.dU
        Dx = sample.dX

        final_l = np.zeros(T)
        final_lu = np.zeros((T, Du))
        final_lx = np.zeros((T, Dx))
        final_luu = np.zeros((T, Du, Du))
        final_lxx = np.zeros((T, Dx, Dx))
        final_lux = np.zeros((T, Du, Dx))

        for data_type in self._hyperparams['data_types']:
            config = self._hyperparams['data_types'][data_type]
            wp = config['wp']
            tgt = config['target_state']
            # print("tgt for cost_state evaluation", tgt)

            # Modified by RH
            map_size = config['map_size']
           
            
            # TODO
            # if agent is not bus, tgt = config['target_state']
            # if agent is bus, update the target_state for each round of gpsMain.run()
            if map_size:
                # it's AgentBus
                target_state = config['target_state']
                tgt = [target_state[0]-map_size[1]/2, map_size[0]/2-target_state[1], target_state[2]]
                
            x = sample.get(data_type)
            _, dim_sensor = x.shape

            wpm = get_ramp_multiplier(
                self._hyperparams['ramp_option'], T,
                wp_final_multiplier=self._hyperparams['wp_final_multiplier']
            )
            wp = wp * np.expand_dims(wpm, axis=-1)
            # Compute state penalty.
            dist = x - tgt

            # Evaluate penalty term.
            l, ls, lss = evall1l2term(
                wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]),
                np.zeros((T, dim_sensor, dim_sensor, dim_sensor)),
                self._hyperparams['l1'], self._hyperparams['l2'],
                self._hyperparams['alpha']
            )

            final_l += l

            sample.agent.pack_data_x(final_lx, ls, data_types=[data_type])
            sample.agent.pack_data_x(final_lxx, lss,
                                     data_types=[data_type, data_type])
        # print("new tgt", tgt)
        # print("cost_state", final_l)
        return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
Example #16
0
    def eval(self, sample):
        """
        Evaluate cost function and derivatives on a sample.
        Args:
            sample:  A single sample
        """
        T = sample.T
        Du = sample.dU
        Dx = sample.dX

        final_l = np.zeros(T)
        final_lu = np.zeros((T, Du))
        final_lx = np.zeros((T, Dx))
        final_luu = np.zeros((T, Du, Du))
        final_lxx = np.zeros((T, Dx, Dx))
        final_lux = np.zeros((T, Du, Dx))

        for data_type in self._hyperparams['data_types']:
            # print("data_type", data_type)
            config = self._hyperparams['data_types'][data_type]
            wp = config['wp']
            x = sample.get(data_type)
            _, dim_sensor = x.shape
            # print("x in cost_collision", x.shape)
            # print(x)

            wpm = get_ramp_multiplier(
                self._hyperparams['ramp_option'],
                T,
                wp_final_multiplier=self._hyperparams['wp_final_multiplier'])
            wp = wp * np.expand_dims(wpm, axis=-1)
            # Compute state penalty.

            # TODO
            # create a state map with all polygons represented as 1
            # draw a corresponding box to represent the bus, and then use the dot multiplication of overlapping to indicate collision loss
            map_size = config['map_size']
            map_state = config['map_state']
            # print("map_size for collision", map_size)

            target_state = config['target_state']
            # print(target_state)
            # print("is target on road", map_state[ int(target_state[1]), int(target_state[0])])
            target_state = [
                target_state[0] - map_size[1] / 2,
                map_size[0] / 2 - target_state[1], target_state[2]
            ]
            # print("target for cost_collision")
            # print(target_state)

            dist = np.zeros(x.shape)

            for i in range(len(x)):
                # find the rectangle bus, given the center position
                x1 = int(BUS_LENGTH / 2 * np.cos(x[i][2]) +
                         BUS_WIDTH / 2 * np.sin(x[i][2]) + x[i][0])
                x2 = int(BUS_LENGTH / 2 * np.cos(x[i][2]) -
                         BUS_WIDTH / 2 * np.sin(x[i][2]) + x[i][0])
                x3 = int(-BUS_LENGTH / 2 * np.cos(x[i][2]) +
                         BUS_WIDTH / 2 * np.sin(x[i][2]) + x[i][0])
                x4 = int(-BUS_LENGTH / 2 * np.cos(x[i][2]) -
                         BUS_WIDTH / 2 * np.sin(x[i][2]) + x[i][0])
                y1 = int(BUS_LENGTH / 2 * np.sin(x[i][2]) +
                         BUS_WIDTH / 2 * np.cos(x[i][2]) + x[i][1])
                y2 = int(BUS_LENGTH / 2 * np.sin(x[i][2]) -
                         BUS_WIDTH / 2 * np.cos(x[i][2]) + x[i][1])
                y3 = int(-BUS_LENGTH / 2 * np.sin(x[i][2]) +
                         BUS_WIDTH / 2 * np.cos(x[i][2]) + x[i][1])
                y4 = int(-BUS_LENGTH / 2 * np.sin(x[i][2]) -
                         BUS_WIDTH / 2 * np.cos(x[i][2]) + x[i][1])
                xmin = min(x1, x2, x3, x4)
                xmax = max(x1, x2, x3, x4)
                ymin = min(y1, y2, y3, y4)
                ymax = max(y1, y2, y3, y4)
                # simplify the overlapping as the sum of four endpoints of a bounding box
                # print(xmin, xmax, ymin, ymax) # which are based on box2D coords
                xmin = xmin + map_size[1] / 2
                xmax = xmax + map_size[1] / 2
                ymin = map_size[0] / 2 - ymin
                ymax = map_size[0] / 2 - ymax
                # print(xmin, xmax, ymin, ymax)
                # print(map_state[xmin, ymin], map_state[xmin, ymax], map_state[xmax, ymin], map_state[xmax, ymax] )
                dist_temp = map_state[ymin, xmin] + map_state[
                    ymax, xmin] + map_state[ymin,
                                            xmax] + map_state[ymax,
                                                              xmax] - 4 * ROAD
                # print("dist", dist_temp)
                dist[i] = [dist_temp, dist_temp, 0]

            # Evaluate penalty term.
            l, ls, lss = evall1l2term(
                wp, dist, np.tile(np.eye(dim_sensor), [T, 1, 1]),
                np.zeros((T, dim_sensor, dim_sensor, dim_sensor)),
                self._hyperparams['l1'], self._hyperparams['l2'],
                self._hyperparams['alpha'])

            final_l += l

            sample.agent.pack_data_x(final_lx, ls, data_types=[data_type])
            sample.agent.pack_data_x(final_lxx,
                                     lss,
                                     data_types=[data_type, data_type])
        # print("return collision_cost")
        # print(final_l, final_lx, final_lu, final_lxx, final_luu, final_lux)
        # print("dsit_temp", dist_temp)
        # print("cost_collisoion", final_l[0])
        return final_l, final_lx, final_lu, final_lxx, final_luu, final_lux
Example #17
0
    def eval(self, sample):
        """
        Evaluate forward kinematics (end-effector penalties) cost.
        Temporary note: This implements the 'joint' penalty type from
            the matlab code, with the velocity/velocity diff/etc.
            penalties removed. (use CostState instead)
        Args:
            sample: A single sample.
        """
        T = sample.T
        dX = sample.dX
        dU = sample.dU

        wpm = get_ramp_multiplier(
            self._hyperparams['ramp_option'],
            T,
            wp_final_multiplier=self._hyperparams['wp_final_multiplier'])
        wp = self._hyperparams['wp'] * np.expand_dims(wpm, axis=-1)

        # Initialize terms.
        l = np.zeros(T)
        lu = np.zeros((T, dU))
        lx = np.zeros((T, dX))
        luu = np.zeros((T, dU, dU))
        lxx = np.zeros((T, dX, dX))
        lux = np.zeros((T, dU, dX))

        # Choose target.
        tgt = self._hyperparams['target_end_effector']
        pt = sample.get(END_EFFECTOR_POINTS)
        dist_shape = pt.shape
        if tgt.ndim > 1:
            #multi-goal, expand pt in goal dimension and tgt in time dimension for broadcast
            pt = np.expand_dims(pt, axis=0)
            tgt = np.expand_dims(tgt, axis=1)
            dist = pt - tgt  #goals*time*dX
            goal_idx = np.argmin(np.sum(np.power(dist, 2), axis=-1), axis=0)
            dist = dist[goal_idx, range(T)]
        else:
            dist = pt - tgt
        assert dist.shape == dist_shape

        # TODO - These should be partially zeros so we're not double
        #        counting.
        #        (see pts_jacobian_only in matlab costinfos code)
        jx = sample.get(END_EFFECTOR_POINT_JACOBIANS)

        # Evaluate penalty term. Use estimated Jacobians and no higher
        # order terms.
        jxx_zeros = np.zeros((T, dist.shape[1], jx.shape[2], jx.shape[2]))
        l, ls, lss = self._hyperparams['evalnorm'](wp, dist, jx, jxx_zeros,
                                                   self._hyperparams['l1'],
                                                   self._hyperparams['l2'],
                                                   self._hyperparams['alpha'])
        # Add to current terms.
        sample.agent.pack_data_x(lx, ls, data_types=[JOINT_ANGLES])
        sample.agent.pack_data_x(lxx,
                                 lss,
                                 data_types=[JOINT_ANGLES, JOINT_ANGLES])

        return l, lx, lu, lxx, luu, lux