Example #1
0
def build_dataset(idims=9,odims=6,angi=[],f=test_func1,n_train=500,n_test=50, input_noise=0.01, output_noise=0.01,rand_seed=None):
    if rand_seed is not None:
        np.random.seed(rand_seed)
    #  ================== train dataset ==================
    # sample training points
    x_train = 15*(np.random.rand(n_train,idims) - 0.25)
    # generate the output at the training points
    y_train = np.empty((n_train,odims))
    for i in range(odims):
        y_train[:,i] =  (i+1)*f(x_train) + output_noise*(np.random.randn(n_train))
    x_train = utils.gTrig_np(x_train, angi)
    
    #  ================== test  dataset ==================
    # generate testing points
    #kk = input_noise*convolve2d(np.array([[1,2,3,2,1]]),np.array([[1,2,3,2,1]]).T)/9.0;
    #s_test = convolve2d(np.eye(idims),kk,'same')
    s_test = input_noise*np.eye(idims)
    s_test = np.tile(s_test,(n_test,1)).reshape(n_test,idims,idims)
    x_test = 120*(np.random.rand(n_test,idims) - 0.5)
    # generate the output at the test points
    y_test = np.empty((n_test,odims))
    for i in range(odims):
        y_test[:,i] =  (i+1)*f(x_test)
    if len(angi)>0:
        x_test,s_test = utils.gTrig2_np(x_test,s_test, angi, idims)

    return (x_train,y_train),(x_test,y_test,s_test)
Example #2
0
    def train_inverse_dynamics(self, deltas=True):
        utils.print_with_stamp('Training inverse dynamics model', self.name)

        X = []
        Y = []
        n_episodes = len(self.experience.states)

        if n_episodes > 0:
            # construct training dataset
            for i in range(self.next_episode_inv, n_episodes):
                x = np.array(self.experience.states[i])
                u = np.array(self.experience.actions[i])

                # inputs are pairs of consecutive states < x_{t}, x_{t+1} >
                x_ = utils.gTrig_np(x, self.angle_idims)
                if deltas:
                    X.append(np.hstack((x_[:-1], x_[:-1] - x_[1:])))
                else:
                    X.append(np.hstack((x_[:-1], x_[1:])))
                # outputs are the actions that produced the input state transition
                Y.append(u[:-1])

            self.next_episode_inv = n_episodes
            X = np.vstack(X)
            Y = np.vstack(Y)

            # get distribution of initial states
            x0 = np.array([x[0] for x in self.experience.states])
            if n_episodes > 1:
                self.mx0.set_value(x0.mean(0).astype('float64'))
                self.Sx0.set_value(np.cov(x0.T).astype('float64'))
            else:
                self.mx0.set_value(x0.astype('float64').flatten())
                self.Sx0.set_value(1e-2 * np.eye(x0.size).astype('float64'))

            # append data to the dynamics model
            self.inverse_dynamics_model.append_dataset(X, Y)
        else:
            x0 = np.array(self.plant.x0, dtype='float64').squeeze()
            S0 = np.array(self.plant.S0, dtype='float64').squeeze()
            self.mx0.set_value(x0)
            self.Sx0.set_value(S0)

        utils.print_with_stamp(
            'Dataset size:: Inputs: [ %s ], Targets: [ %s ]  ' %
            (self.inverse_dynamics_model.X.get_value().shape,
             self.inverse_dynamics_model.Y.get_value().shape), self.name)
        if self.inverse_dynamics_model.should_recompile:
            # reinitialize log likelihood
            self.inverse_dynamics_model.init_loss()

        self.inverse_dynamics_model.train()
        utils.print_with_stamp('Done training inverse dynamics model',
                               self.name)
Example #3
0
    def get_state(self, noisy=True):
        state = self.state

        if noisy and self.noise_dist is not None:
            # noisy state measurement
            state += self.noise_dist.sample(1).flatten()

        if self.angle_dims:
            # convert angle dimensions to complex representation
            state = gTrig_np(state, self.angle_dims).flatten()
        return state.flatten(), self.t
Example #4
0
def distance_based_cost(mx,
                        Sx,
                        target,
                        Q,
                        cw=[1],
                        expl=None,
                        angle_dims=[],
                        loss_func=quadratic_saturating_loss,
                        *args,
                        **kwargs):
    '''
        Loss function that depends on a quadratic function of state
    '''
    if isinstance(target, list) or isinstance(target, tuple):
        target = np.array(target)

    # convert angle dimensions
    if angle_dims:
        if isinstance(target, np.ndarray):
            target = utils.gTrig_np(target, angle_dims).flatten()
        else:
            target = utils.gTrig(target, angle_dims).flatten()
        mx, Sx = convert_angle_dimensions(mx, Sx, angle_dims)

    Q = Q.astype(theano.config.floatX)
    target = target.astype(theano.config.floatX)

    if not isinstance(cw, list):
        cw = [cw]
    if Sx is None:
        # deterministic case
        cost = []
        # total cost is the sum of costs with different widths
        # TODO this can be vecotrized
        for c in cw:
            cost_c = loss_func(mx, None, target, Q / c**2, *args, **kwargs)
            cost.append(cost_c)
        return sum(cost) / len(cw)
    else:
        M_cost = []
        S_cost = []
        # total cost is the sum of costs with different widths
        # TODO this can be vectorized
        for c in cw:
            m_cost, s_cost = loss_func(mx, Sx, target, Q / c**2, *args,
                                       **kwargs)
            # add UCB  exploration term
            if expl is not None and expl != 0.0:
                m_cost += expl * tt.sqrt(s_cost)
            M_cost.append(m_cost)
            S_cost.append(s_cost)
        return sum(M_cost) / len(cw), sum(S_cost) / (len(cw)**2)
Example #5
0
    def train_adjustment(self, deltas=True):
        n_source_traj = 5
        n_target_traj = 3
        total_trajectories = len(self.source_experience.states)
        X = []
        Y = []
        Y_var = []

        # from sourcce trajectories
        for i in range(max(0, total_trajectories - n_source_traj),
                       total_trajectories):
            self.inverse_dynamics_model.update()
            # for every state transition in the source experience, use the target inverse dynamics
            # to find an action that would produce the desired transition
            x = np.array(self.source_experience.states[i])
            x_s = utils.gTrig_np(x, self.angle_idims)
            # source transitions
            if deltas:
                t_s = np.hstack((x_s[:-1], x_s[:-1] - x_s[1:]))
            else:
                t_s = np.hstack((x_s[:-1], x_s[1:]))

            # source actions
            u_s = np.array(self.source_experience.actions[i])

            # target actions
            u_t = []
            Su_t = []
            for t_s_i in t_s:
                # get prediction from inverse dynamics model
                u, Su, Cu = self.inverse_dynamics_model.predict(t_s_i)
                u_t.append(np.random.multivariate_normal(u, Su))
                #u_t.append(u)
                Su_t.append(np.diag(np.maximum(Su, 1e-9)))

            u_t = np.stack(u_t)
            Su_t = np.stack(Su_t)

            if self.policy.use_control_input:
                X.append(np.hstack([x_s[1:-1], u_s[1:-1]]))
            else:
                X.append(x_s[:-1])

            Y.append(u_t[1:] - u_s[1:-1])
            Y_var.append(Su_t[1:])

        X = np.vstack(X)
        Y = np.vstack(Y)
        Y_var = np.vstack(Y_var)
        self.policy.adjustment_model.set_dataset(X, Y, Y_var=Y_var)
        self.policy.adjustment_model.train()
        self.policy.adjustment_model.update()
Example #6
0
    def init_params(self, compile_funcs=False):
        utils.print_with_stamp('Initializing parameters', self.name)

        # init inputs
        inputs_ = self.state0_dist.sample(self.n_inducing)
        inputs = utils.gTrig_np(inputs_, self.angle_dims)

        # set the initial log hyperparameters (1 for linear dimensions,
        # 0.7 for angular)
        l0 = np.hstack([
            np.ones(inputs_.shape[1] - len(self.angle_dims)),
            0.7 * np.ones(2 * len(self.angle_dims)), 1, 0.01
        ])

        l0 = np.tile(l0, (self.maxU.size, 1)).astype(floatX)
        l0 = np.log(np.exp(l0, dtype=floatX) - 1.0)

        # init policy targets close to zero
        mu = np.zeros((self.maxU.size, ))
        Su = 0.1 * np.eye(self.maxU.size)
        targets = utils.distributions.Gaussian(mu, Su).sample(self.n_inducing)
        targets = targets.reshape((self.n_inducing, self.maxU.size))

        self.trained = False

        # set the parameters
        self.N = inputs.shape[0]
        self.D = inputs.shape[1]
        self.E = targets.shape[1]

        self.set_params({
            'X': inputs.astype(floatX),
            'Y': targets.astype(floatX)
        })
        self.set_params({'unconstrained_hyp': l0.astype(floatX)})
        eps = np.finfo(np.__dict__[floatX]).eps
        self.hyp = tt.nnet.softplus(self.unconstrained_hyp) + eps

        # don't optimize the signal and noise variances
        self.hyp = tt.concatenate([
            self.hyp[:, :-2],
            theano.gradient.disconnected_grad(self.hyp[:, -2:])
        ],
                                  axis=np.array(1, dtype='int64'))

        # call init loss to initialize the intermediate shared variables
        super(RBFGP, self).get_loss(cache_intermediate=False)

        # init the prediction function
        self.evaluate(np.zeros((self.D, )))
Example #7
0
def preprocess_angles(x_t, angle_dims=[]):
    x_t_ = utils.gTrig_np(x_t[None, :], angle_dims).flatten()
    return x_t_
Example #8
0
    def get_dynmodel_dataset(self, deltas=True, filter_episodes=None,
                             angle_dims=None, x_steps=1,
                             u_steps=1, output_steps=1, return_costs=False,
                             stack=False):
        '''
        Returns a dataset where the inputs are state_actions and the outputs
        are next steps.
        Parameters:
        -----------
        deltas: wheter to return changes in state
                (x_t - x_{t-1}, x_{t-1} - x_{t-2}, ...)
                or future states
                (x_t, x_{t-1}, x_{t-2}, ...), in the output
        filter_episodes: list containing  episode indices to extract from
                         which to extract data.
                         if list empty or undefined ( equal to None ),
                         extracts data from all episodes
        angle_dims: indices of input state dimensions to linearize, by
                    converting to complex
                    representation \theta => (sin(\theta), cos(\theta))
        x_steps: how many steps in the past to concatenate as input
        u_steps: how many steps in the past to concatenate as input
        output_steps: how many steps in the future to concatenate as output
        return_costs: whether to append the cost feedback to the output
        stack: whether to stack or concatenate the multi step data
        Returns:
        --------
        X: if stack is False X is a numpy array of shape
           [n, x_steps*D + u_steps*U], where n is the number of data samples,
           D the input state dimensions.abs if stack is True, the shape of X
           is [n, x_steps, D + U]
        '''
        filter_episodes = filter_episodes or []
        angle_dims = angle_dims or []
        inputs, targets = [], []
        join = np.stack if stack else np.concatenate
        if stack:
            # ignore the u_steps parameter
            u_steps = x_steps
            # output steps
            output_steps = x_steps + output_steps - 1

        if not isinstance(filter_episodes, list):
            filter_episodes = [filter_episodes]
        if len(filter_episodes) < 1:
            # use all data
            filter_episodes = list(range(self.n_episodes()))
        for epi in filter_episodes:
            if len(self.states[epi]) == 0:
                continue
            # get state action pairs for current episode
            states, actions = np.array(
                self.states[epi]), np.array(self.actions[epi])
            # convert input angle dimensions to complex representation
            states_ = utils.gTrig_np(np.array(states), angle_dims)
            # pad with initial state for the first x_steps timesteps
            states_ = np.concatenate([states_[[0]*(x_steps-1)], states_])
            # get input states up to x_steps in the past.

            states_ = join(
                [states_[i:i-x_steps-(output_steps-1), :]
                 for i in range(x_steps)],
                axis=1)
            # same for actions (u_steps in the past, pad with zeros for the
            # first u_steps)
            actions_ = np.concatenate(
                [np.zeros((u_steps-1, actions.shape[1])), actions])
            actions_ = join(
                [actions_[i:i-u_steps-(output_steps-1), :]
                 for i in range(u_steps)],
                axis=1)

            # create input vector
            inp = np.concatenate([states_, actions_], axis=-1)

            # get output states up to output_steps in the future
            H = states.shape[0]
            ostates = join(
                [states[i:H-(output_steps-i-1), :]
                 for i in range(output_steps)],
                axis=1)

            #  create output vector
            tgt = (ostates[1:, :] - ostates[:-1, :]
                   if deltas else ostates[1:, :])

            # append costs if requested
            if return_costs:
                costs = np.array(self.costs[epi])[:, None]
                ocosts = join(
                    [costs[i:H-(output_steps-i-1), :]
                     for i in range(output_steps)],
                    axis=1)
                tgt = np.concatenate([tgt, ocosts[1:, :]], axis=-1)

            inputs.append(inp)
            targets.append(tgt)

        ret = np.concatenate(inputs), np.concatenate(targets)
        return ret
Example #9
0
def gTrig(state, angle_dims=[]):
    return utils.gTrig_np(state, angle_dims).flatten()