def Tfft2old(a): """ assumes len(a.shape)==2, does not assert this """ #A = np.zeros_like(a,dtype=np.complex_) s = a.shape[1] S = s//2+1 aa = T.stack([a],axis=0) AA = Tfft.rfft(aa) B = AA[...,0] + 1.j * AA[...,1] # Theano rfft stores complex values in separate array #return AA[0,...,0] #A[:,:S] = B # copy left half to right half #A[:,S:] = np.conj(np.fliplr(A[:,1:S-1])) #below no worky C = B[0,...] CC = C[:,1:S-1] return T.concatenate([C,T.conj(CC[:,::-1])],axis=1) A = T.zeros_like(a) #Alookup = theano.shared(A) Afront = A[:,:S] Aback = A[:,S:] A = T.set_subtensor(Afront,B) A = T.set_subtensor(Aback,T.conj(Tfftshift(A))[:,S:]) #A[:,:S] = B #A[:,S:] = T.conj(npfft.fftshift(A))[:,S:] return A
def get_updates(self, learning_rate): #specify the expression used to compute the weight update, with complex gradient descent dW = self.W + learning_rate*self.negative_log_likelihood()*T.conj(self.input) gparams = T.grad(self.cost(), self.params) # generate the list of updates updates = [] for param, gparam in zip(self.params, gparams): updates.append((param, param - learning_rate * gparam)) return updates
def resfunc(i, xvec, y, h1, h2, h3vec, U1, U2, U3ten, conjU1, conjU2, conjU3ten): deph1 = TT.exp(-g * (t2 - y)) deph2 = TT.exp(-g * (t3 - t2)) deph3 = TT.exp(-g * (xvec[i] - t3)) inhom14 = TT.exp(-s * ((xvec[i] - t3 + t2 - y)**2)) inhom23 = TT.exp(-s * (((xvec[i] - t3) - (t2 - y))**2)) r14a = (TT.dot(U1, TT.dot(m, TT.dot(p0, conjU1)))) * deph1 r23a = (TT.dot(U1, TT.dot(p0, TT.dot(m, conjU1)))) * deph1 r1 = TTnlinalg.trace( TT.dot(m, ((TT.dot( U3ten[:, :, i], TT.dot( m, TT.dot(( (TT.dot(U2, TT.dot(m, TT.dot(r14a, conjU2)))) * deph2), conjU3ten[:, :, i])))) * deph3))) * inhom14 r2 = (TTnlinalg.trace( TT.dot(m, ((TT.dot( U3ten[:, :, i], TT.dot(((TT.dot(U2, TT.dot(m, TT.dot(r23a, conjU2)))) * deph2), TT.dot(m, conjU3ten[:, :, i])))) * deph3)))) * inhom23 r3 = (TTnlinalg.trace( TT.dot(m, ((TT.dot( U3ten[:, :, i], TT.dot( m, TT.dot(((TT.dot(U2, TT.dot(r23a, TT.dot(m, conjU2)))) * deph2), conjU3ten[:, :, i])))) * deph3)))) * inhom23 r4 = (TTnlinalg.trace( TT.dot(m, ((TT.dot( U3ten[:, :, i], TT.dot(((TT.dot(U2, TT.dot(r14a, TT.dot(m, conjU2)))) * deph2), TT.dot(m, conjU3ten[:, :, i])))) * deph3)))) * inhom14 return (1j * 1j * 1j) * h1 * h2 * h3vec[i] * ( r1 + r2 + r3 + r4 - TT.conj(r1) - TT.conj(r2) - TT.conj(r3) - TT.conj(r4))
def Tfft2(a): """ assumes len(a.shape)==2, does not assert this """ #A = np.zeros_like(a,dtype=np.complex_) s = a.shape[0] S = s//2+1 #aa = T.stack([a],axis=0) aa = a.reshape((1,a.shape[0],a.shape[1])) AA = Tfft.rfft(aa) B = AA[...,0] + 1.j * AA[...,1] # Theano rfft stores complex values in separate array # get first output C = B[0,...] CC = C[:,1:S-1] A = T.zeros_like(a) Afront = A[:,:S] A = T.set_subtensor(Afront,C) Aback = A[:,S:] A = T.set_subtensor(Aback,T.conj(Tfftshift(A))[:,S:]) return A
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) shapes = [K.get_variable_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators self.updates = [] lr = self.lr if self.inital_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) self.updates.append(K.update_add(self.iterations, 1)) for param, grad, accum, shape in zip(params, grads, accumulators, shapes): if ('natGrad' in param.name): if ('natGradRMS' in param.name): # apply RMSprop rule to gradient before natural gradient step new_accum = self.rho * accum + (1. - self.rho) * K.square(grad) self.updates.append(K.update(accum, new_accum)) grad = grad / (K.sqrt(new_accum) + self.epsilon) elif ('unitaryAug' in param.name): # we don't care about the accumulated RMS for the natural gradient step self.updates.append(K.update(accum, accum)) # do a natural gradient step if ('unitaryAug' in param.name): # unitary natural gradient step on augmented ReIm matrix j = K.cast(1j, 'complex64') A = K.cast(K.transpose(param[:shape[1] / 2, :shape[1] / 2]), 'complex64') B = K.cast(K.transpose(param[:shape[1] / 2, shape[1] / 2:]), 'complex64') X = A + j * B C = K.cast(K.transpose(grad[:shape[1] / 2, :shape[1] / 2]), 'complex64') D = K.cast(K.transpose(grad[:shape[1] / 2, shape[1] / 2:]), 'complex64') """ Build skew-Hermitian matrix A from equation (8) of GX^H = CA^T + DB^T + jDA^T - jCB^T. """ GXH = K.dot(C, K.transpose(A)) + K.dot(D, K.transpose(B)) \ + j * K.dot(D, K.transpose(A)) - j * K.dot(C, K.transpose(B)) Askew = GXH - K.transpose(T.conj(GXH)) I = K.eye(shape[1] / 2) two = K.cast(2, 'complex64') CayleyDenom = I + (self.lr_natGrad / two) * Askew CayleyNumer = I - (self.lr_natGrad / two) * Askew # Multiplicative gradient step along Stiefel manifold equation. Xnew = K.dot(K.dot(T.nlinalg.matrix_inverse(CayleyDenom), CayleyNumer), X) # Convert to ReIm augmented form. XnewRe = K.transpose(T.real(Xnew)) XnewIm = K.transpose(T.imag(Xnew)) new_param = K.concatenate((K.concatenate((XnewRe, XnewIm), axis=1), K.concatenate(((-1) * XnewIm, XnewRe), axis=1)),axis=0) else: # Do the usual RMSprop update using lr_natGrad as learning rate. # Update Accumulator. new_accum = self.rho * accum + (1. - self.rho) * K.square(grad) self.updates.append(K.update(accum, new_accum)) new_param = param - self.lr_natGrad * grad / (K.sqrt(new_accum) + self.epsilon) else: # Do the usual RMSprop update. # Update accumulator. new_accum = self.rho * accum + (1. - self.rho) * K.square(grad) self.updates.append(K.update(accum, new_accum)) new_param = param - lr * grad / (K.sqrt(new_accum) + self.epsilon) # Apply Constraints. if param in constraints: c = constraints[param] new_param = c(new_param) self.updates.append(K.update(param, new_param)) return self.updates
def step(X): return T.square(T.conj(X)) + C
def rms_prop(learning_rate, parameters, gradients, idx_project=None): rmsprop = [ theano.shared(1e-3 * np.ones_like(p.get_value())) for p in parameters ] if idx_project is not None: # we will use projected gradient on the Stiefel manifold on these parameters # we will assume these parameters are unitary matrices in real-composite form parameters_proj = [parameters[i] for i in idx_project] gradients_proj = [gradients[i] for i in idx_project] sizes_proj = [p.shape for p in parameters_proj] # compute gradient in tangent space of Stiefel manifold (see Lemma 4 of [Tagare 2011]) # X = A+jB Aall = [ T.cast(T.transpose(p[:s[0] / 2, :s[0] / 2]), 'complex64') for s, p in zip(sizes_proj, parameters_proj) ] Ball = [ T.cast(T.transpose(p[:s[0] / 2, s[0] / 2:]), 'complex64') for s, p in zip(sizes_proj, parameters_proj) ] # G = C+jD Call = [ T.cast(T.transpose(g[:s[0] / 2, :s[0] / 2]), 'complex64') for s, g in zip(sizes_proj, gradients_proj) ] Dall = [ T.cast(T.transpose(g[:s[0] / 2, s[0] / 2:]), 'complex64') for s, g in zip(sizes_proj, gradients_proj) ] # GX^H = CA^T + DB^T + jDA^T -jCB^T GXHall = [T.dot(C,T.transpose(A)) + T.dot(D,T.transpose(B)) \ + T.cast(1j,'complex64')*T.dot(D,T.transpose(A)) - T.cast(1j,'complex64')*T.dot(C,T.transpose(B)) \ for A, B, C, D in zip(Aall, Ball, Call, Dall)] Xall = [A + T.cast(1j, 'complex64') * B for A, B in zip(Aall, Ball)] ## Gt = (GX^H - XG^H)X #Gtall = [T.dot(GXH - T.transpose(T.conj(GXH)),X) for GXH, X in zip(GXHall,Xall)] # compute Cayley transform, which is curve of steepest descent (see section 4 of [Tagare 2011]) Wall = [GXH - T.transpose(T.conj(GXH)) for GXH in GXHall] Iall = [T.identity_like(W) for W in Wall] W2pall = [ I + (learning_rate / T.cast(2, 'complex64')) * W for I, W in zip(Iall, Wall) ] W2mall = [ I - (learning_rate / T.cast(2, 'complex64')) * W for I, W in zip(Iall, Wall) ] if (learning_rate > 0.0): Gtall = [ T.dot(T.dot(T.nlinalg.matrix_inverse(W2p), W2m), X) for W2p, W2m, X in zip(W2pall, W2mall, Xall) ] else: Gtall = [X for X in Xall] # perform transposes to prepare for converting back to transposed real-composite form GtallRe = [T.transpose(T.real(Gt)) for Gt in Gtall] GtallIm = [T.transpose(T.imag(Gt)) for Gt in Gtall] # convert back to real-composite form: gradients_tang = [ T.concatenate([ T.concatenate([GtRe, GtIm], axis=1), T.concatenate([(-1) * GtIm, GtRe], axis=1) ], axis=0) for GtRe, GtIm in zip(GtallRe, GtallIm) ] new_rmsprop = [ 0.9 * vel + 0.1 * (g**2) for vel, g in zip(rmsprop, gradients) ] updates1 = zip(rmsprop, new_rmsprop) updates2 = [(p, p - learning_rate * g / T.sqrt(rms)) for p, g, rms in zip(parameters, gradients, new_rmsprop)] if idx_project is not None: # project back on to the Stiefel manifold using SVD # see 3.3 of [Absil and Malick 2012] def proj_stiefel(X): # projects a square transposed real-composite form matrix X onto the Stiefel manifold n = X.shape[0] # X=A+jB A = T.transpose(X[:n / 2, :n / 2]) B = T.transpose(X[:n / 2, n / 2:]) U, S, V = T.nlinalg.svd(A + T.cast(1j, 'complex64') * B) W = T.dot(U, V) # convert back to transposed real-composite form WRe = T.transpose(T.real(W)) WIm = T.transpose(T.imag(W)) Wrc = T.concatenate([ T.concatenate([WRe, WIm], axis=0), T.concatenate([(-1) * WIm, WRe], axis=0) ], axis=1) return Wrc new_rmsprop_proj = [new_rmsprop[i] for i in idx_project] #updates2_proj=[(p,proj_stiefel(p - learning_rate * g )) for # p, g, rms in zip(parameters_proj,gradients_tang, new_rmsprop_proj)] updates2_proj = [(p, g) for p, g, rms in zip( parameters_proj, gradients_tang, new_rmsprop_proj)] for i in range(len(updates2_proj)): updates2[idx_project[i]] = updates2_proj[i] updates = updates1 + updates2 return updates, rmsprop
def negative_log_likelihood(self): return T.mean( T.real((self.y_pred - self.y) * T.conj(self.y_pred - self.y)) )