コード例 #1
0
def convert_angle_dimensions(mx, Sx, angle_dims=[]):
    if Sx is None:
        flatten = False
        if mx.ndim == 1:
            flatten = True
            mx = mx[None, :]
        mxa = utils.gTrig(mx, angle_dims)
        if flatten:
            # since we are dealing with one input vector at a time
            mxa = mxa.flatten()
        Sxa = None
    else:
        # angle dimensions are removed, and their complex
        # representation is appended
        mxa, Sxa = utils.gTrig2(mx, Sx, angle_dims)[:2]

    return mxa, Sxa
コード例 #2
0
def propagate_belief(mx, Sx, policy, dynmodel, angle_dims=None):
    ''' Given the input variables mx (tt.vector) and Sx (tt.matrix),
        representing the mean and variance of the system's state x, this
        function returns the next state distribution, and the mean and
        variance of the immediate cost. This is done by:
        1) evaluating the current policy
        2) using the dynamics model to estimate the next state.
        The immediate cost is returned as a distribution Normal(mcost,Scost),
        since the state is uncertain.

        This implementation is based on Theano, thus all operations are assumed
        to be symbolic; i.e. we are constructing a computation graph.

        @param mx mean of state distribution
        @param Sx variance of state distribution
        @param dynmodel dynamics model compatible with moment matching
        @param policy Interface to the policy operations, compatible with
               moment matching
        @param cost cost function, compatible with moment matching
    '''
    if angle_dims is None:
        angle_dims = []
    if isinstance(angle_dims, list) or isinstance(angle_dims, tuple):
        angle_dims = np.array(angle_dims, dtype=np.int32)
    D = mx.size

    # convert angles from input distribution to their complex representation
    mxa, Sxa, Ca = utils.gTrig2(mx, Sx, angle_dims)

    # compute distribution of control signal
    mu, Su, Cu = policy.predict(mxa, Sxa)

    # compute state control joint distribution
    mxu = tt.concatenate([mxa, mu])
    if isinstance(policy, regression.SSGP) or\
       isinstance(policy, regression.BNN):
        q = Cu
    else:
        q = Sxa.dot(Cu)
    Sxu_up = tt.concatenate([Sxa, q], axis=1)
    Sxu_lo = tt.concatenate([q.T, Su], axis=1)
    Sxu = tt.concatenate([Sxu_up, Sxu_lo], axis=0)  # [D+U]x[D+U]

    #  predict the change in state given current state-action
    # C_deltax = inv (Sxu) dot Sxu_deltax
    m_deltax, S_deltax, C_deltax = dynmodel.predict(mxu, Sxu)

    # compute the successor state distribution
    mx_next = mx + m_deltax

    # SSGP and BNN return C_delta as the input-output covariance. All the
    # others do it as (input covariance)^-1 dot (input-output covariance)
    if isinstance(dynmodel, regression.SSGP) or\
       isinstance(dynmodel, regression.BNN):
        Sxu_deltax = C_deltax
    else:
        Sxu_deltax = Sxu.dot(C_deltax)

    idx = tt.arange(D)
    non_angle_dims = (1 - tt.eq(idx, angle_dims[:, None])).prod(0).nonzero()[0]
    Da = D + angle_dims.size
    Dna = D - angle_dims.size
    # this contains the covariance between the previous state (with angles
    # as [sin,cos]), and the next state (with angles in radians)
    Sxa_deltax = Sxu_deltax[:Da]
    # first come the non angle dimensions  [D-len(angle_dims)] x [D]
    sxna_deltax = Sxa_deltax[:Dna]
    # then angles as [sin,cos]             [2*len(angle_dims)] x [D]
    sxsc_deltax = Sxa_deltax[Dna:]
    # here we undo the [sin,cos] parametrization for the angle dimensions
    Sx_sc = Sx.dot(Ca)[angle_dims]
    Sa = Sxa[Dna:, Dna:]
    sxa_deltax = Sx_sc.dot(tt.slinalg.solve(Sa, sxsc_deltax))
    # now we create Sx_deltax and fill it with the appropriate values
    # (i.e. in the correct order)
    Sx_deltax = tt.zeros((D, D))
    Sx_deltax = tt.set_subtensor(Sx_deltax[non_angle_dims, :], sxna_deltax)
    Sx_deltax = tt.set_subtensor(Sx_deltax[angle_dims, :], sxa_deltax)

    Sx_next = Sx + S_deltax + Sx_deltax + Sx_deltax.T

    # check if dynamics model has an updates dictionary
    updates = theano.updates.OrderedUpdates()

    return [mx_next, Sx_next], updates