def test_on_real_input(self): x = dvector() rng = np.random.RandomState(23) xval = rng.randn(10) np.all(0 == theano.function([x], imag(x))(xval)) np.all(xval == theano.function([x], real(x))(xval)) x = imatrix() xval = np.asarray(rng.randn(3, 3) * 100, dtype="int32") np.all(0 == theano.function([x], imag(x))(xval)) np.all(xval == theano.function([x], real(x))(xval))
def test_basic(self): x = zvector() rng = np.random.RandomState(23) xval = np.asarray( list(np.complex(rng.randn(), rng.randn()) for i in range(10))) assert np.all(xval.real == theano.function([x], real(x))(xval)) assert np.all(xval.imag == theano.function([x], imag(x))(xval))
def Tifft2(A): s = A.shape[1] S = s//2 + 1 #B = T.tensor3() # np.zeros((s,S,2),dtype=np.float_) #B[:,:,0] = T.real(A) #B[:,:,1] = T.imag(A) B = T.zeros((1,A.shape[1],S,2)) Breal = B[0,:,:,0] B = T.set_subtensor(Breal,T.real(A[:,:S])) Bimag = B[0,:,:,1] B = T.set_subtensor(Bimag,T.imag(A[:,:S])) #B = T.stack([T.real(A[:,:S]),T.imag(A[:,:S])],axis=2) #BB = T.stack([B],axis=0) return Tfft.irfft(B)[0,:,:] #,A.shape)
def test_complex(self): rng = np.random.RandomState(2333) m = fmatrix() c = complex(m[0], m[1]) assert c.type == cvector r, i = [real(c), imag(c)] assert r.type == fvector assert i.type == fvector f = theano.function([m], [r, i]) mval = np.asarray(rng.randn(2, 5), dtype="float32") rval, ival = f(mval) assert np.all(rval == mval[0]), (rval, mval[0]) assert np.all(ival == mval[1]), (ival, mval[1])
def proj_stiefel(X): # projects a square transposed real-composite form matrix X onto the Stiefel manifold n = X.shape[0] # X=A+jB A = T.transpose(X[:n / 2, :n / 2]) B = T.transpose(X[:n / 2, n / 2:]) U, S, V = T.nlinalg.svd(A + T.cast(1j, 'complex64') * B) W = T.dot(U, V) # convert back to transposed real-composite form WRe = T.transpose(T.real(W)) WIm = T.transpose(T.imag(W)) Wrc = T.concatenate([ T.concatenate([WRe, WIm], axis=0), T.concatenate([(-1) * WIm, WRe], axis=0) ], axis=1) return Wrc
def __init__(self, input, n_in, n_out): """ Initialize the parameters of the logistic regression :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.W = theano.shared(value=numpy.zeros((n_in, n_out), dtype=theano.config.floatX), name='W', borrow=True) # initialize the baises b as a vector of n_out 0s self.b = theano.shared(value=numpy.zeros((n_out, ), dtype=theano.config.floatX), name='b', borrow=True) # compute vector of class-membership probabilities in symbolic form L = T.dot(input, self.W) LL = L + self.b #print LL.type self.p_y_given_x = T.nnet.softmax(T.real(LL)) # compute prediction as class whose probability is maximal in # symbolic form self.y_pred = T.argmax(self.p_y_given_x, axis=1) # parameters of the model self.params = [self.W, self.b]
def __init__(self, input, n_in, n_out): """ Initialize the parameters of the logistic regression :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.W = theano.shared(value=numpy.zeros((n_in, n_out), dtype=theano.config.floatX), name='W', borrow=True) # initialize the baises b as a vector of n_out 0s self.b = theano.shared(value=numpy.zeros((n_out,), dtype=theano.config.floatX), name='b', borrow=True) # compute vector of class-membership probabilities in symbolic form L = T.dot(input, self.W) LL = L + self.b #print LL.type self.p_y_given_x = T.nnet.softmax(T.real(LL)) # compute prediction as class whose probability is maximal in # symbolic form self.y_pred = T.argmax(self.p_y_given_x, axis=1) # parameters of the model self.params = [self.W, self.b]
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) shapes = [K.get_variable_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators self.updates = [] lr = self.lr if self.inital_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) self.updates.append(K.update_add(self.iterations, 1)) for param, grad, accum, shape in zip(params, grads, accumulators, shapes): if ('natGrad' in param.name): if ('natGradRMS' in param.name): # apply RMSprop rule to gradient before natural gradient step new_accum = self.rho * accum + (1. - self.rho) * K.square(grad) self.updates.append(K.update(accum, new_accum)) grad = grad / (K.sqrt(new_accum) + self.epsilon) elif ('unitaryAug' in param.name): # we don't care about the accumulated RMS for the natural gradient step self.updates.append(K.update(accum, accum)) # do a natural gradient step if ('unitaryAug' in param.name): # unitary natural gradient step on augmented ReIm matrix j = K.cast(1j, 'complex64') A = K.cast(K.transpose(param[:shape[1] / 2, :shape[1] / 2]), 'complex64') B = K.cast(K.transpose(param[:shape[1] / 2, shape[1] / 2:]), 'complex64') X = A + j * B C = K.cast(K.transpose(grad[:shape[1] / 2, :shape[1] / 2]), 'complex64') D = K.cast(K.transpose(grad[:shape[1] / 2, shape[1] / 2:]), 'complex64') """ Build skew-Hermitian matrix A from equation (8) of GX^H = CA^T + DB^T + jDA^T - jCB^T. """ GXH = K.dot(C, K.transpose(A)) + K.dot(D, K.transpose(B)) \ + j * K.dot(D, K.transpose(A)) - j * K.dot(C, K.transpose(B)) Askew = GXH - K.transpose(T.conj(GXH)) I = K.eye(shape[1] / 2) two = K.cast(2, 'complex64') CayleyDenom = I + (self.lr_natGrad / two) * Askew CayleyNumer = I - (self.lr_natGrad / two) * Askew # Multiplicative gradient step along Stiefel manifold equation. Xnew = K.dot(K.dot(T.nlinalg.matrix_inverse(CayleyDenom), CayleyNumer), X) # Convert to ReIm augmented form. XnewRe = K.transpose(T.real(Xnew)) XnewIm = K.transpose(T.imag(Xnew)) new_param = K.concatenate((K.concatenate((XnewRe, XnewIm), axis=1), K.concatenate(((-1) * XnewIm, XnewRe), axis=1)),axis=0) else: # Do the usual RMSprop update using lr_natGrad as learning rate. # Update Accumulator. new_accum = self.rho * accum + (1. - self.rho) * K.square(grad) self.updates.append(K.update(accum, new_accum)) new_param = param - self.lr_natGrad * grad / (K.sqrt(new_accum) + self.epsilon) else: # Do the usual RMSprop update. # Update accumulator. new_accum = self.rho * accum + (1. - self.rho) * K.square(grad) self.updates.append(K.update(accum, new_accum)) new_param = param - lr * grad / (K.sqrt(new_accum) + self.epsilon) # Apply Constraints. if param in constraints: c = constraints[param] new_param = c(new_param) self.updates.append(K.update(param, new_param)) return self.updates
def real(x): '''Grabs the real part of a complex tensor ''' return T.real(x)
def build_func(): slices = T.cmatrix() S = T.cmatrix() envelope = T.dvector() ctf = T.dmatrix() d = T.cmatrix() logW_S = T.dvector() logW_I = T.dvector() logW_R = T.dvector() div_in = T.dscalar() sigma2_coloured = T.dvector() cproj = slices[:, np.newaxis, :] * ctf # r * i * t cim = S[:, np.newaxis, :] * d # s * i * t correlation_I = T.real(cproj[:, np.newaxis, :, :]) * T.real(cim) \ + T.imag(cproj[:, np.newaxis, :, :]) * T.imag(cim) # r * s * i * t power_I = T.real(cproj) ** 2 + T.imag(cproj) ** 2 # r * i * t g_I = envelope * cproj[:, np.newaxis, :, :] - cim # r * s * i * t sigma2_I = T.real(g_I) ** 2 + T.imag(g_I) ** 2 # r * s * i * t g_I *= ctf # r * s * i * t tmp = T.sum(sigma2_I / sigma2_coloured, axis=-1) # r * s * i e_I = div_in * tmp + logW_I # r * s * i etmp = my_logsumexp_theano(e_I) # r * s e_S = etmp + logW_S # r * s tmp = logW_S + logW_R[:, np.newaxis] # r * s phitmp = T.exp(e_I - etmp[:, :, np.newaxis]) # r * s * i I_tmp = tmp[:, :, np.newaxis] + e_I correlation_S = T.sum(phitmp[:, :, :, np.newaxis] * correlation_I, axis=2) # r * s * t power_S = T.sum(phitmp[:, :, :, np.newaxis] * power_I[:, np.newaxis, :, :], axis=2) # r * s * t sigma2_S = T.sum(phitmp[:, :, :, np.newaxis] * sigma2_I, axis=2) # r * s * t g_S = T.sum(phitmp[:, :, :, np.newaxis] * g_I, axis=2) # r * s * t etmp = my_logsumexp_theano(e_S) # r e_R = etmp + logW_R # r tmp = logW_R # r phitmp = T.exp(e_S - etmp[:, np.newaxis]) # r * s S_tmp = tmp[:, np.newaxis] + e_S correlation_R = T.sum(phitmp[:, :, np.newaxis] * correlation_S, axis=1) # r * t power_R = T.sum(phitmp[:, :, np.newaxis] * power_S, axis=1) # r * t sigma2_R = T.sum(phitmp[:, :, np.newaxis] * sigma2_S, axis=1) # r * t g = T.sum(phitmp[:, :, np.newaxis] * g_S, axis=1) # r * t tmp = -2.0 * div_in nttmp = tmp * envelope / sigma2_coloured e = my_logsumexp_theano(e_R) lse_in = -e # Noise estimate phitmp = e_R - e R_tmp = phitmp phitmp = T.exp(phitmp) sigma2_est = T.dot(phitmp, sigma2_R) correlation = T.dot(phitmp, correlation_R) power = T.dot(phitmp, power_R) global func func = theano.function(inputs=[slices, S, envelope, ctf, d, logW_S, logW_I, logW_R, div_in, sigma2_coloured], outputs=[g, I_tmp, S_tmp, R_tmp, sigma2_est, correlation, power, nttmp, lse_in, phitmp])
def f(m): c = complex(m[0], m[1]) return 0.5 * real(c) + 0.9 * imag(c)
def f(m): c = complex_from_polar(abs(m[0]), m[1]) return 0.5 * real(c) + 0.9 * imag(c)
def rms_prop(learning_rate, parameters, gradients, idx_project=None): rmsprop = [ theano.shared(1e-3 * np.ones_like(p.get_value())) for p in parameters ] if idx_project is not None: # we will use projected gradient on the Stiefel manifold on these parameters # we will assume these parameters are unitary matrices in real-composite form parameters_proj = [parameters[i] for i in idx_project] gradients_proj = [gradients[i] for i in idx_project] sizes_proj = [p.shape for p in parameters_proj] # compute gradient in tangent space of Stiefel manifold (see Lemma 4 of [Tagare 2011]) # X = A+jB Aall = [ T.cast(T.transpose(p[:s[0] / 2, :s[0] / 2]), 'complex64') for s, p in zip(sizes_proj, parameters_proj) ] Ball = [ T.cast(T.transpose(p[:s[0] / 2, s[0] / 2:]), 'complex64') for s, p in zip(sizes_proj, parameters_proj) ] # G = C+jD Call = [ T.cast(T.transpose(g[:s[0] / 2, :s[0] / 2]), 'complex64') for s, g in zip(sizes_proj, gradients_proj) ] Dall = [ T.cast(T.transpose(g[:s[0] / 2, s[0] / 2:]), 'complex64') for s, g in zip(sizes_proj, gradients_proj) ] # GX^H = CA^T + DB^T + jDA^T -jCB^T GXHall = [T.dot(C,T.transpose(A)) + T.dot(D,T.transpose(B)) \ + T.cast(1j,'complex64')*T.dot(D,T.transpose(A)) - T.cast(1j,'complex64')*T.dot(C,T.transpose(B)) \ for A, B, C, D in zip(Aall, Ball, Call, Dall)] Xall = [A + T.cast(1j, 'complex64') * B for A, B in zip(Aall, Ball)] ## Gt = (GX^H - XG^H)X #Gtall = [T.dot(GXH - T.transpose(T.conj(GXH)),X) for GXH, X in zip(GXHall,Xall)] # compute Cayley transform, which is curve of steepest descent (see section 4 of [Tagare 2011]) Wall = [GXH - T.transpose(T.conj(GXH)) for GXH in GXHall] Iall = [T.identity_like(W) for W in Wall] W2pall = [ I + (learning_rate / T.cast(2, 'complex64')) * W for I, W in zip(Iall, Wall) ] W2mall = [ I - (learning_rate / T.cast(2, 'complex64')) * W for I, W in zip(Iall, Wall) ] if (learning_rate > 0.0): Gtall = [ T.dot(T.dot(T.nlinalg.matrix_inverse(W2p), W2m), X) for W2p, W2m, X in zip(W2pall, W2mall, Xall) ] else: Gtall = [X for X in Xall] # perform transposes to prepare for converting back to transposed real-composite form GtallRe = [T.transpose(T.real(Gt)) for Gt in Gtall] GtallIm = [T.transpose(T.imag(Gt)) for Gt in Gtall] # convert back to real-composite form: gradients_tang = [ T.concatenate([ T.concatenate([GtRe, GtIm], axis=1), T.concatenate([(-1) * GtIm, GtRe], axis=1) ], axis=0) for GtRe, GtIm in zip(GtallRe, GtallIm) ] new_rmsprop = [ 0.9 * vel + 0.1 * (g**2) for vel, g in zip(rmsprop, gradients) ] updates1 = zip(rmsprop, new_rmsprop) updates2 = [(p, p - learning_rate * g / T.sqrt(rms)) for p, g, rms in zip(parameters, gradients, new_rmsprop)] if idx_project is not None: # project back on to the Stiefel manifold using SVD # see 3.3 of [Absil and Malick 2012] def proj_stiefel(X): # projects a square transposed real-composite form matrix X onto the Stiefel manifold n = X.shape[0] # X=A+jB A = T.transpose(X[:n / 2, :n / 2]) B = T.transpose(X[:n / 2, n / 2:]) U, S, V = T.nlinalg.svd(A + T.cast(1j, 'complex64') * B) W = T.dot(U, V) # convert back to transposed real-composite form WRe = T.transpose(T.real(W)) WIm = T.transpose(T.imag(W)) Wrc = T.concatenate([ T.concatenate([WRe, WIm], axis=0), T.concatenate([(-1) * WIm, WRe], axis=0) ], axis=1) return Wrc new_rmsprop_proj = [new_rmsprop[i] for i in idx_project] #updates2_proj=[(p,proj_stiefel(p - learning_rate * g )) for # p, g, rms in zip(parameters_proj,gradients_tang, new_rmsprop_proj)] updates2_proj = [(p, g) for p, g, rms in zip( parameters_proj, gradients_tang, new_rmsprop_proj)] for i in range(len(updates2_proj)): updates2[idx_project[i]] = updates2_proj[i] updates = updates1 + updates2 return updates, rmsprop
def negative_log_likelihood(self): return T.mean( T.real((self.y_pred - self.y) * T.conj(self.y_pred - self.y)) )
def build_func(): slices = T.cmatrix() S = T.cmatrix() envelope = T.dvector() ctf = T.dmatrix() d = T.cmatrix() logW_S = T.dvector() logW_I = T.dvector() logW_R = T.dvector() div_in = T.dscalar() sigma2_coloured = T.dvector() cproj = slices[:, np.newaxis, :] * ctf # r * i * t cim = S[:, np.newaxis, :] * d # s * i * t correlation_I = T.real(cproj[:, np.newaxis, :, :]) * T.real(cim) \ + T.imag(cproj[:, np.newaxis, :, :]) * T.imag(cim) # r * s * i * t power_I = T.real(cproj)**2 + T.imag(cproj)**2 # r * i * t g_I = envelope * cproj[:, np.newaxis, :, :] - cim # r * s * i * t sigma2_I = T.real(g_I)**2 + T.imag(g_I)**2 # r * s * i * t g_I *= ctf # r * s * i * t tmp = T.sum(sigma2_I / sigma2_coloured, axis=-1) # r * s * i e_I = div_in * tmp + logW_I # r * s * i etmp = my_logsumexp_theano(e_I) # r * s e_S = etmp + logW_S # r * s tmp = logW_S + logW_R[:, np.newaxis] # r * s phitmp = T.exp(e_I - etmp[:, :, np.newaxis]) # r * s * i I_tmp = tmp[:, :, np.newaxis] + e_I correlation_S = T.sum(phitmp[:, :, :, np.newaxis] * correlation_I, axis=2) # r * s * t power_S = T.sum(phitmp[:, :, :, np.newaxis] * power_I[:, np.newaxis, :, :], axis=2) # r * s * t sigma2_S = T.sum(phitmp[:, :, :, np.newaxis] * sigma2_I, axis=2) # r * s * t g_S = T.sum(phitmp[:, :, :, np.newaxis] * g_I, axis=2) # r * s * t etmp = my_logsumexp_theano(e_S) # r e_R = etmp + logW_R # r tmp = logW_R # r phitmp = T.exp(e_S - etmp[:, np.newaxis]) # r * s S_tmp = tmp[:, np.newaxis] + e_S correlation_R = T.sum(phitmp[:, :, np.newaxis] * correlation_S, axis=1) # r * t power_R = T.sum(phitmp[:, :, np.newaxis] * power_S, axis=1) # r * t sigma2_R = T.sum(phitmp[:, :, np.newaxis] * sigma2_S, axis=1) # r * t g = T.sum(phitmp[:, :, np.newaxis] * g_S, axis=1) # r * t tmp = -2.0 * div_in nttmp = tmp * envelope / sigma2_coloured e = my_logsumexp_theano(e_R) lse_in = -e # Noise estimate phitmp = e_R - e R_tmp = phitmp phitmp = T.exp(phitmp) sigma2_est = T.dot(phitmp, sigma2_R) correlation = T.dot(phitmp, correlation_R) power = T.dot(phitmp, power_R) global func func = theano.function(inputs=[ slices, S, envelope, ctf, d, logW_S, logW_I, logW_R, div_in, sigma2_coloured ], outputs=[ g, I_tmp, S_tmp, R_tmp, sigma2_est, correlation, power, nttmp, lse_in, phitmp ])