def convert_angle_dimensions(mx, Sx, angle_dims=[]): if Sx is None: flatten = False if mx.ndim == 1: flatten = True mx = mx[None, :] mxa = utils.gTrig(mx, angle_dims) if flatten: # since we are dealing with one input vector at a time mxa = mxa.flatten() Sxa = None else: # angle dimensions are removed, and their complex # representation is appended mxa, Sxa = utils.gTrig2(mx, Sx, angle_dims)[:2] return mxa, Sxa
def propagate_belief(mx, Sx, policy, dynmodel, angle_dims=None): ''' Given the input variables mx (tt.vector) and Sx (tt.matrix), representing the mean and variance of the system's state x, this function returns the next state distribution, and the mean and variance of the immediate cost. This is done by: 1) evaluating the current policy 2) using the dynamics model to estimate the next state. The immediate cost is returned as a distribution Normal(mcost,Scost), since the state is uncertain. This implementation is based on Theano, thus all operations are assumed to be symbolic; i.e. we are constructing a computation graph. @param mx mean of state distribution @param Sx variance of state distribution @param dynmodel dynamics model compatible with moment matching @param policy Interface to the policy operations, compatible with moment matching @param cost cost function, compatible with moment matching ''' if angle_dims is None: angle_dims = [] if isinstance(angle_dims, list) or isinstance(angle_dims, tuple): angle_dims = np.array(angle_dims, dtype=np.int32) D = mx.size # convert angles from input distribution to their complex representation mxa, Sxa, Ca = utils.gTrig2(mx, Sx, angle_dims) # compute distribution of control signal mu, Su, Cu = policy.predict(mxa, Sxa) # compute state control joint distribution mxu = tt.concatenate([mxa, mu]) if isinstance(policy, regression.SSGP) or\ isinstance(policy, regression.BNN): q = Cu else: q = Sxa.dot(Cu) Sxu_up = tt.concatenate([Sxa, q], axis=1) Sxu_lo = tt.concatenate([q.T, Su], axis=1) Sxu = tt.concatenate([Sxu_up, Sxu_lo], axis=0) # [D+U]x[D+U] # predict the change in state given current state-action # C_deltax = inv (Sxu) dot Sxu_deltax m_deltax, S_deltax, C_deltax = dynmodel.predict(mxu, Sxu) # compute the successor state distribution mx_next = mx + m_deltax # SSGP and BNN return C_delta as the input-output covariance. All the # others do it as (input covariance)^-1 dot (input-output covariance) if isinstance(dynmodel, regression.SSGP) or\ isinstance(dynmodel, regression.BNN): Sxu_deltax = C_deltax else: Sxu_deltax = Sxu.dot(C_deltax) idx = tt.arange(D) non_angle_dims = (1 - tt.eq(idx, angle_dims[:, None])).prod(0).nonzero()[0] Da = D + angle_dims.size Dna = D - angle_dims.size # this contains the covariance between the previous state (with angles # as [sin,cos]), and the next state (with angles in radians) Sxa_deltax = Sxu_deltax[:Da] # first come the non angle dimensions [D-len(angle_dims)] x [D] sxna_deltax = Sxa_deltax[:Dna] # then angles as [sin,cos] [2*len(angle_dims)] x [D] sxsc_deltax = Sxa_deltax[Dna:] # here we undo the [sin,cos] parametrization for the angle dimensions Sx_sc = Sx.dot(Ca)[angle_dims] Sa = Sxa[Dna:, Dna:] sxa_deltax = Sx_sc.dot(tt.slinalg.solve(Sa, sxsc_deltax)) # now we create Sx_deltax and fill it with the appropriate values # (i.e. in the correct order) Sx_deltax = tt.zeros((D, D)) Sx_deltax = tt.set_subtensor(Sx_deltax[non_angle_dims, :], sxna_deltax) Sx_deltax = tt.set_subtensor(Sx_deltax[angle_dims, :], sxa_deltax) Sx_next = Sx + S_deltax + Sx_deltax + Sx_deltax.T # check if dynamics model has an updates dictionary updates = theano.updates.OrderedUpdates() return [mx_next, Sx_next], updates