예제 #1
0
def Tfft2old(a):
	""" assumes len(a.shape)==2, does not assert this """
	#A = np.zeros_like(a,dtype=np.complex_)
	s = a.shape[1]
	S = s//2+1
	aa = T.stack([a],axis=0)
	AA = Tfft.rfft(aa)
	B = AA[...,0] + 1.j * AA[...,1] # Theano rfft stores complex values in separate array 
	#return AA[0,...,0]
	#A[:,:S] = B 
	# copy left half to right half
	#A[:,S:] = np.conj(np.fliplr(A[:,1:S-1]))
	#below no worky
	C = B[0,...]
	CC = C[:,1:S-1]
	return  T.concatenate([C,T.conj(CC[:,::-1])],axis=1)
	A = T.zeros_like(a)
	#Alookup = theano.shared(A)
	Afront = A[:,:S]
	Aback = A[:,S:]
	A = T.set_subtensor(Afront,B)
	A = T.set_subtensor(Aback,T.conj(Tfftshift(A))[:,S:])
	#A[:,:S] = B
	#A[:,S:] = T.conj(npfft.fftshift(A))[:,S:]
	return A
예제 #2
0
    def get_updates(self, learning_rate):

        #specify the expression used to compute the weight update, with complex gradient descent
        dW = self.W + learning_rate*self.negative_log_likelihood()*T.conj(self.input)
        gparams = T.grad(self.cost(), self.params)
        # generate the list of updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - learning_rate * gparam))
        return updates
 def resfunc(i, xvec, y, h1, h2, h3vec, U1, U2, U3ten, conjU1, conjU2,
             conjU3ten):
     deph1 = TT.exp(-g * (t2 - y))
     deph2 = TT.exp(-g * (t3 - t2))
     deph3 = TT.exp(-g * (xvec[i] - t3))
     inhom14 = TT.exp(-s * ((xvec[i] - t3 + t2 - y)**2))
     inhom23 = TT.exp(-s * (((xvec[i] - t3) - (t2 - y))**2))
     r14a = (TT.dot(U1, TT.dot(m, TT.dot(p0, conjU1)))) * deph1
     r23a = (TT.dot(U1, TT.dot(p0, TT.dot(m, conjU1)))) * deph1
     r1 = TTnlinalg.trace(
         TT.dot(m, ((TT.dot(
             U3ten[:, :, i],
             TT.dot(
                 m,
                 TT.dot((
                     (TT.dot(U2, TT.dot(m, TT.dot(r14a, conjU2)))) *
                     deph2), conjU3ten[:, :, i])))) * deph3))) * inhom14
     r2 = (TTnlinalg.trace(
         TT.dot(m, ((TT.dot(
             U3ten[:, :, i],
             TT.dot(((TT.dot(U2, TT.dot(m, TT.dot(r23a, conjU2)))) *
                     deph2), TT.dot(m, conjU3ten[:, :, i])))) *
                    deph3)))) * inhom23
     r3 = (TTnlinalg.trace(
         TT.dot(m, ((TT.dot(
             U3ten[:, :, i],
             TT.dot(
                 m,
                 TT.dot(((TT.dot(U2, TT.dot(r23a, TT.dot(m, conjU2)))) *
                         deph2), conjU3ten[:, :, i])))) *
                    deph3)))) * inhom23
     r4 = (TTnlinalg.trace(
         TT.dot(m, ((TT.dot(
             U3ten[:, :, i],
             TT.dot(((TT.dot(U2, TT.dot(r14a, TT.dot(m, conjU2)))) *
                     deph2), TT.dot(m, conjU3ten[:, :, i])))) *
                    deph3)))) * inhom14
     return (1j * 1j * 1j) * h1 * h2 * h3vec[i] * (
         r1 + r2 + r3 + r4 - TT.conj(r1) - TT.conj(r2) - TT.conj(r3) -
         TT.conj(r4))
예제 #4
0
def Tfft2(a):
	""" assumes len(a.shape)==2, does not assert this """
	#A = np.zeros_like(a,dtype=np.complex_)
	s = a.shape[0]
	S = s//2+1
	#aa = T.stack([a],axis=0)
	aa = a.reshape((1,a.shape[0],a.shape[1]))
	AA = Tfft.rfft(aa)
	B = AA[...,0] + 1.j * AA[...,1] # Theano rfft stores complex values in separate array 
	# get first output
	C = B[0,...]
	CC = C[:,1:S-1]
	A = T.zeros_like(a)
	Afront = A[:,:S]
	A = T.set_subtensor(Afront,C)
	Aback = A[:,S:]
	A = T.set_subtensor(Aback,T.conj(Tfftshift(A))[:,S:])
	return A
예제 #5
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        shapes = [K.get_variable_shape(p) for p in params]
        accumulators = [K.zeros(shape) for shape in shapes]
        self.weights = accumulators
        self.updates = []

        lr = self.lr
        if self.inital_decay > 0:
            lr *= (1. / (1. + self.decay * self.iterations))
            self.updates.append(K.update_add(self.iterations, 1))

        for param, grad, accum, shape in zip(params, grads, accumulators, shapes):

            if ('natGrad' in param.name):
                if ('natGradRMS' in param.name):
                    # apply RMSprop rule to gradient before natural gradient step
                    new_accum = self.rho * accum + (1. - self.rho) * K.square(grad)
                    self.updates.append(K.update(accum, new_accum))
                    grad = grad / (K.sqrt(new_accum) + self.epsilon)
                elif ('unitaryAug' in param.name):
                    # we don't care about the accumulated RMS for the natural gradient step
                    self.updates.append(K.update(accum, accum))

                # do a natural gradient step
                if ('unitaryAug' in param.name):
                    # unitary natural gradient step on augmented ReIm matrix
                    j = K.cast(1j, 'complex64')
                    A = K.cast(K.transpose(param[:shape[1] / 2, :shape[1] / 2]), 'complex64')
                    B = K.cast(K.transpose(param[:shape[1] / 2, shape[1] / 2:]), 'complex64')
                    X = A + j * B
                    C = K.cast(K.transpose(grad[:shape[1] / 2, :shape[1] / 2]), 'complex64')
                    D = K.cast(K.transpose(grad[:shape[1] / 2, shape[1] / 2:]), 'complex64')

                    """
                    Build skew-Hermitian matrix A from equation (8) of
                    GX^H = CA^T + DB^T + jDA^T - jCB^T.
                    """
                    GXH = K.dot(C, K.transpose(A)) + K.dot(D, K.transpose(B)) \
                          + j * K.dot(D, K.transpose(A)) - j * K.dot(C, K.transpose(B))
                    Askew = GXH - K.transpose(T.conj(GXH))
                    I = K.eye(shape[1] / 2)
                    two = K.cast(2, 'complex64')
                    CayleyDenom = I + (self.lr_natGrad / two) * Askew
                    CayleyNumer = I - (self.lr_natGrad / two) * Askew

                    # Multiplicative gradient step along Stiefel manifold equation.
                    Xnew = K.dot(K.dot(T.nlinalg.matrix_inverse(CayleyDenom), CayleyNumer), X)

                    # Convert to ReIm augmented form.
                    XnewRe = K.transpose(T.real(Xnew))
                    XnewIm = K.transpose(T.imag(Xnew))
                    new_param = K.concatenate((K.concatenate((XnewRe, XnewIm), axis=1), K.concatenate(((-1) * XnewIm, XnewRe), axis=1)),axis=0)
                else:
                    # Do the usual RMSprop update using lr_natGrad as learning rate.
                    # Update Accumulator.
                    new_accum = self.rho * accum + (1. - self.rho) * K.square(grad)
                    self.updates.append(K.update(accum, new_accum))
                    new_param = param - self.lr_natGrad * grad / (K.sqrt(new_accum) + self.epsilon)
            else:
                # Do the usual RMSprop update.
                # Update accumulator.
                new_accum = self.rho * accum + (1. - self.rho) * K.square(grad)
                self.updates.append(K.update(accum, new_accum))
                new_param = param - lr * grad / (K.sqrt(new_accum) + self.epsilon)

            # Apply Constraints.
            if param in constraints:
                c = constraints[param]
                new_param = c(new_param)
            self.updates.append(K.update(param, new_param))
        return self.updates
예제 #6
0
 def step(X):
     return T.square(T.conj(X)) + C
예제 #7
0
def rms_prop(learning_rate, parameters, gradients, idx_project=None):
    rmsprop = [
        theano.shared(1e-3 * np.ones_like(p.get_value())) for p in parameters
    ]

    if idx_project is not None:
        # we will use projected gradient on the Stiefel manifold on these parameters
        # we will assume these parameters are unitary matrices in real-composite form
        parameters_proj = [parameters[i] for i in idx_project]
        gradients_proj = [gradients[i] for i in idx_project]
        sizes_proj = [p.shape for p in parameters_proj]
        # compute gradient in tangent space of Stiefel manifold (see Lemma 4 of [Tagare 2011])
        # X = A+jB
        Aall = [
            T.cast(T.transpose(p[:s[0] / 2, :s[0] / 2]), 'complex64')
            for s, p in zip(sizes_proj, parameters_proj)
        ]
        Ball = [
            T.cast(T.transpose(p[:s[0] / 2, s[0] / 2:]), 'complex64')
            for s, p in zip(sizes_proj, parameters_proj)
        ]
        # G = C+jD
        Call = [
            T.cast(T.transpose(g[:s[0] / 2, :s[0] / 2]), 'complex64')
            for s, g in zip(sizes_proj, gradients_proj)
        ]
        Dall = [
            T.cast(T.transpose(g[:s[0] / 2, s[0] / 2:]), 'complex64')
            for s, g in zip(sizes_proj, gradients_proj)
        ]
        # GX^H = CA^T + DB^T + jDA^T -jCB^T
        GXHall = [T.dot(C,T.transpose(A)) + T.dot(D,T.transpose(B))  \
               + T.cast(1j,'complex64')*T.dot(D,T.transpose(A)) - T.cast(1j,'complex64')*T.dot(C,T.transpose(B)) \
               for A, B, C, D in zip(Aall, Ball, Call, Dall)]
        Xall = [A + T.cast(1j, 'complex64') * B for A, B in zip(Aall, Ball)]
        ## Gt = (GX^H - XG^H)X
        #Gtall = [T.dot(GXH - T.transpose(T.conj(GXH)),X) for GXH, X in zip(GXHall,Xall)]
        # compute Cayley transform, which is curve of steepest descent (see section 4 of [Tagare 2011])
        Wall = [GXH - T.transpose(T.conj(GXH)) for GXH in GXHall]
        Iall = [T.identity_like(W) for W in Wall]
        W2pall = [
            I + (learning_rate / T.cast(2, 'complex64')) * W
            for I, W in zip(Iall, Wall)
        ]
        W2mall = [
            I - (learning_rate / T.cast(2, 'complex64')) * W
            for I, W in zip(Iall, Wall)
        ]
        if (learning_rate > 0.0):
            Gtall = [
                T.dot(T.dot(T.nlinalg.matrix_inverse(W2p), W2m), X)
                for W2p, W2m, X in zip(W2pall, W2mall, Xall)
            ]
        else:
            Gtall = [X for X in Xall]
        # perform transposes to prepare for converting back to transposed real-composite form
        GtallRe = [T.transpose(T.real(Gt)) for Gt in Gtall]
        GtallIm = [T.transpose(T.imag(Gt)) for Gt in Gtall]
        # convert back to real-composite form:
        gradients_tang = [
            T.concatenate([
                T.concatenate([GtRe, GtIm], axis=1),
                T.concatenate([(-1) * GtIm, GtRe], axis=1)
            ],
                          axis=0) for GtRe, GtIm in zip(GtallRe, GtallIm)
        ]

    new_rmsprop = [
        0.9 * vel + 0.1 * (g**2) for vel, g in zip(rmsprop, gradients)
    ]

    updates1 = zip(rmsprop, new_rmsprop)
    updates2 = [(p, p - learning_rate * g / T.sqrt(rms))
                for p, g, rms in zip(parameters, gradients, new_rmsprop)]
    if idx_project is not None:
        # project back on to the Stiefel manifold using SVD
        # see 3.3 of [Absil and Malick 2012]
        def proj_stiefel(X):
            # projects a square transposed real-composite form matrix X onto the Stiefel manifold
            n = X.shape[0]
            # X=A+jB
            A = T.transpose(X[:n / 2, :n / 2])
            B = T.transpose(X[:n / 2, n / 2:])
            U, S, V = T.nlinalg.svd(A + T.cast(1j, 'complex64') * B)
            W = T.dot(U, V)
            # convert back to transposed real-composite form
            WRe = T.transpose(T.real(W))
            WIm = T.transpose(T.imag(W))
            Wrc = T.concatenate([
                T.concatenate([WRe, WIm], axis=0),
                T.concatenate([(-1) * WIm, WRe], axis=0)
            ],
                                axis=1)
            return Wrc

        new_rmsprop_proj = [new_rmsprop[i] for i in idx_project]
        #updates2_proj=[(p,proj_stiefel(p - learning_rate * g )) for
        #               p, g, rms in zip(parameters_proj,gradients_tang, new_rmsprop_proj)]
        updates2_proj = [(p, g) for p, g, rms in zip(
            parameters_proj, gradients_tang, new_rmsprop_proj)]
        for i in range(len(updates2_proj)):
            updates2[idx_project[i]] = updates2_proj[i]

    updates = updates1 + updates2

    return updates, rmsprop
예제 #8
0
 def negative_log_likelihood(self):
     return T.mean( T.real((self.y_pred - self.y) * T.conj(self.y_pred - self.y)) )