예제 #1
0
    def fit(self, X, U):
        """ Fit dynamics. """
        N, T, dX = X.shape
        dU = U.shape[2]

        if N == 1:
            raise ValueError("Cannot fit dynamics on 1 sample")

        self.Fm = np.zeros([T, dX, dX+dU])
        self.fv = np.zeros([T, dX])
        self.dyn_covar = np.zeros([T, dX, dX])

        it = slice(dX+dU)
        ip = slice(dX+dU, dX+dU+dX)
        # Fit dynamics with least squares regression.
        dwts = (1.0 / N) * np.ones(N)
        for t in range(T - 1):
            Ys = np.c_[X[:, t, :], U[:, t, :], X[:, t+1, :]]
            # Obtain Normal-inverse-Wishart prior.
            mu0, Phi, mm, n0 = self.prior.eval(dX, dU, Ys)
            sig_reg = np.zeros((dX+dU+dX, dX+dU+dX))
            sig_reg[it, it] = self._hyperparams['regularization']
            Fm, fv, dyn_covar = gauss_fit_joint_prior(Ys,
                        mu0, Phi, mm, n0, dwts, dX+dU, dX, sig_reg)
            self.Fm[t, :, :] = Fm
            self.fv[t, :] = fv
            self.dyn_covar[t, :, :] = dyn_covar
        return self.Fm, self.fv, self.dyn_covar
예제 #2
0
    def fit(self, X, U):
        """ Fit dynamics. """
        N, T, dX = X.shape
        dU = U.shape[2]

        if N == 1:
            raise ValueError("Cannot fit dynamics on 1 sample")

        self.Fm = np.zeros([T, dX, dX+dU])
        self.fv = np.zeros([T, dX])
        self.dyn_covar = np.zeros([T, dX, dX])

        it = slice(dX+dU)
        ip = slice(dX+dU, dX+dU+dX)
        # Fit dynamics with least squares regression.
        dwts = (1.0 / N) * np.ones(N)
        for t in range(T - 1):
            Ys = np.c_[X[:, t, :], U[:, t, :], X[:, t+1, :]]
            # Obtain Normal-inverse-Wishart prior.
            mu0, Phi, mm, n0 = self.prior.eval(dX, dU, Ys)
            sig_reg = np.zeros((dX+dU+dX, dX+dU+dX))
            sig_reg[it, it] = self._hyperparams['regularization']
            Fm, fv, dyn_covar = gauss_fit_joint_prior(Ys,
                        mu0, Phi, mm, n0, dwts, dX+dU, dX, sig_reg)
            self.Fm[t, :, :] = Fm
            self.fv[t, :] = fv
            self.dyn_covar[t, :, :] = dyn_covar
        return self.Fm, self.fv, self.dyn_covar
예제 #3
0
 def _update_policy_fit(self, m, init=False):
     """
     Re-estimate the local policy values in the neighborhood of the
     trajectory.
     Args:
         m: Condition
         init: Whether this is the initial fitting of the policy.
     """
     dX, dU, T = self.dX, self.dU, self.T
     # Choose samples to use.
     samples = self.cur[m].sample_list
     N = len(samples)
     pol_info = self.cur[m].pol_info
     X = samples.get_X()
     pol_mu, pol_sig = self.policy_opt.prob(samples.get_obs().copy())[:2]
     pol_info.pol_mu, pol_info.pol_sig = pol_mu, pol_sig
     # Update policy prior.
     if init:
         self.cur[m].pol_info.policy_prior.update(
             samples, self.policy_opt,
             SampleList(self.cur[m].pol_info.policy_samples)
         )
     else:
         self.cur[m].pol_info.policy_prior.update(
             SampleList([]), self.policy_opt,
             SampleList(self.cur[m].pol_info.policy_samples)
         )
     # Collapse policy covariances. This is not really correct, but
     # it works fine so long as the policy covariance doesn't depend
     # on state.
     pol_sig = np.mean(pol_sig, axis=0)
     # Estimate the policy linearization at each time step.
     for t in range(T):
         # Assemble diagonal weights matrix and data.
         dwts = (1.0 / N) * np.ones(N)
         Ts = X[:, t, :]
         Ps = pol_mu[:, t, :]
         Ys = np.concatenate((Ts, Ps), axis=1)
         # Obtain Normal-inverse-Wishart prior.
         mu0, Phi, mm, n0 = self.cur[m].pol_info.policy_prior.eval(Ts, Ps)
         sig_reg = np.zeros((dX+dU, dX+dU))
         # On the first time step, always slightly regularize covariance.
         if t == 0:
             sig_reg[:dX, :dX] = 1e-8 * np.eye(dX)
         # Perform computation.
         pol_K, pol_k, pol_S = gauss_fit_joint_prior(Ys, mu0, Phi, mm, n0,
                                                     dwts, dX, dU, sig_reg)
         pol_S += pol_sig[t, :, :]
         pol_info.pol_K[t, :, :], pol_info.pol_k[t, :] = pol_K, pol_k
         pol_info.pol_S[t, :, :], pol_info.chol_pol_S[t, :, :] = \
                 pol_S, sp.linalg.cholesky(pol_S)
예제 #4
0
 def _update_policy_fit(self, m, init=False):
     """
     Re-estimate the local policy values in the neighborhood of the
     trajectory.
     Args:
         m: Condition
         init: Whether this is the initial fitting of the policy.
     """
     dX, dU, T = self.dX, self.dU, self.T
     # Choose samples to use.
     samples = self.cur[m].sample_list
     N = len(samples)
     pol_info = self.cur[m].pol_info
     X = samples.get_X()
     pol_mu, pol_sig = self.policy_opt.prob(samples.get_obs().copy())[:2]
     pol_info.pol_mu, pol_info.pol_sig = pol_mu, pol_sig
     # Update policy prior.
     if init:
         self.cur[m].pol_info.policy_prior.update(
             samples, self.policy_opt,
             SampleList(self.cur[m].pol_info.policy_samples)
         )
     else:
         self.cur[m].pol_info.policy_prior.update(
             SampleList([]), self.policy_opt,
             SampleList(self.cur[m].pol_info.policy_samples)
         )
     # Collapse policy covariances. This is not really correct, but
     # it works fine so long as the policy covariance doesn't depend
     # on state.
     pol_sig = np.mean(pol_sig, axis=0)
     # Estimate the policy linearization at each time step.
     for t in range(T):
         # Assemble diagonal weights matrix and data.
         dwts = (1.0 / N) * np.ones(N)
         Ts = X[:, t, :]
         Ps = pol_mu[:, t, :]
         Ys = np.concatenate((Ts, Ps), axis=1)
         # Obtain Normal-inverse-Wishart prior.
         mu0, Phi, mm, n0 = self.cur[m].pol_info.policy_prior.eval(Ts, Ps)
         sig_reg = np.zeros((dX+dU, dX+dU))
         # On the first time step, always slightly regularize covariance.
         if t == 0:
             sig_reg[:dX, :dX] = 1e-8 * np.eye(dX)
         # Perform computation.
         pol_K, pol_k, pol_S = gauss_fit_joint_prior(Ys, mu0, Phi, mm, n0,
                                                     dwts, dX, dU, sig_reg)
         pol_S += pol_sig[t, :, :]
         pol_info.pol_K[t, :, :], pol_info.pol_k[t, :] = pol_K, pol_k
         pol_info.pol_S[t, :, :], pol_info.chol_pol_S[t, :, :] = \
                 pol_S, sp.linalg.cholesky(pol_S)
예제 #5
0
    def fit(self, X, pol_mu, pol_sig):
        """
        Fit policy linearization.

        Args:
            X: Samples (N, T, dX)
            pol_mu: Policy means (N, T, dU)
            pol_sig: Policy covariance (N, T, dU)
        """
        N, T, dX = X.shape
        dU = pol_mu.shape[2]
        if N == 1:
            raise ValueError("Cannot fit dynamics on 1 sample")

        # Collapse policy covariances. (This is only correct because
        # the policy doesn't depend on state).
        pol_sig = np.mean(pol_sig, axis=0)

        # Allocate.
        pol_K = np.zeros([T, dU, dX])
        pol_k = np.zeros([T, dU])
        pol_S = np.zeros([T, dU, dU])

        # Fit policy linearization with least squares regression.
        dwts = (1.0 / N) * np.ones(N)
        for t in range(T):
            Ts = X[:, t, :]
            Ps = pol_mu[:, t, :]
            Ys = np.concatenate([Ts, Ps], axis=1)
            # Obtain Normal-inverse-Wishart prior.
            mu0, Phi, mm, n0 = self.eval(Ts, Ps)
            sig_reg = np.zeros((dX + dU, dX + dU))
            # Slightly regularize on first timestep.
            if t == 0:
                #sig_reg[:dX, :dX] = self._init_sig_reg*np.eye(dX)
                #print(self._init_sig_reg.shape)
                np.fill_diagonal(sig_reg[:dX, :dX], self._init_sig_reg)
            else:
                #sig_reg[:dX, :dX] = self._subsequent_sig_reg*np.eye(dX)
                np.fill_diagonal(sig_reg[:dX, :dX], self._subsequent_sig_reg)
            pol_K[t, :, :], pol_k[t, :], pol_S[t, :, :] = \
                    gauss_fit_joint_prior(Ys,
                            mu0, Phi, mm, n0, dwts, dX, dU, sig_reg)
        pol_S += pol_sig
        return pol_K, pol_k, pol_S
예제 #6
0
파일: policy_prior.py 프로젝트: cbfinn/gps
    def fit(self, X, pol_mu, pol_sig):
        """
        Fit policy linearization.

        Args:
            X: Samples (N, T, dX)
            pol_mu: Policy means (N, T, dU)
            pol_sig: Policy covariance (N, T, dU)
        """
        N, T, dX = X.shape
        dU = pol_mu.shape[2]
        if N == 1:
            raise ValueError("Cannot fit dynamics on 1 sample")

        # Collapse policy covariances. (This is only correct because
        # the policy doesn't depend on state).
        pol_sig = np.mean(pol_sig, axis=0)

        # Allocate.
        pol_K = np.zeros([T, dU, dX])
        pol_k = np.zeros([T, dU])
        pol_S = np.zeros([T, dU, dU])

        # Fit policy linearization with least squares regression.
        dwts = (1.0 / N) * np.ones(N)
        for t in range(T):
            Ts = X[:, t, :]
            Ps = pol_mu[:, t, :]
            Ys = np.concatenate([Ts, Ps], axis=1)
            # Obtain Normal-inverse-Wishart prior.
            mu0, Phi, mm, n0 = self.eval(Ts, Ps)
            sig_reg = np.zeros((dX+dU, dX+dU))
            # Slightly regularize on first timestep.
            if t == 0:
                sig_reg[:dX, :dX] = 1e-8
            pol_K[t, :, :], pol_k[t, :], pol_S[t, :, :] = \
                    gauss_fit_joint_prior(Ys,
                            mu0, Phi, mm, n0, dwts, dX, dU, sig_reg)
        pol_S += pol_sig
        return pol_K, pol_k, pol_S
예제 #7
0
    def fit_delta(self, X, U):
        N, T, dX = X.shape
        dU = U.shape[2]

        if N == 1:
            raise ValueError("Cannot fit dynamics on 1 sample")

        X_delta = np.zeros((N, T, dX))
        n_count = 0
        for states_in_single_rollout in X:
            output = states_in_single_rollout[1 : T, :] \
                     - states_in_single_rollout[0 : T - 1, :]
            X_delta[n_count, 1:T, :] = output
            n_count = n_count + 1

        self.Fm = np.zeros([T, dX, dX + dU])
        self.fv = np.zeros([T, dX])
        self.dyn_covar = np.zeros([T, dX, dX])
        Fm_delta = np.zeros([dX, dX + dU])
        for i in range(dX):
            Fm_delta[i][i] = 1

        it = slice(dX + dU)
        ip = slice(dX + dU, dX + dU + dX)
        # Fit dynamics with least squares regression.
        dwts = (1.0 / N) * np.ones(N)
        for t in range(T - 1):
            Ys = np.c_[X[:, t, :], U[:, t, :], X_delta[:, t + 1, :]]
            # Obtain Normal-inverse-Wishart prior.
            mu0, Phi, mm, n0 = self.prior.eval(dX, dU, Ys)
            sig_reg = np.zeros((dX + dU + dX, dX + dU + dX))
            sig_reg[it, it] = self._hyperparams['regularization']
            Fm, fv, dyn_covar = gauss_fit_joint_prior(Ys, mu0, Phi, mm, n0,
                                                      dwts, dX + dU, dX,
                                                      sig_reg)
            self.Fm[t, :, :] = Fm + Fm_delta
            self.fv[t, :] = fv
            self.dyn_covar[t, :, :] = dyn_covar
        return self.Fm, self.fv, self.dyn_covar