def ion_O2_7319A_b_flux_log(self, emis_ratio, cHbeta, flambda, abund, ftau, O3, T_high): col_ext = tt.power(10, abund + emis_ratio - flambda * cHbeta - 12) recomb = tt.power( 10, O3 + 0.9712758 + tt.log10(tt.power(T_high / 10000.0, 0.44)) - flambda * cHbeta - 12) return tt.log10(col_ext + recomb)
def __init__(self, layers, alpha=0.3, C=0.0, rho=0.05, beta=0.1): if len(layers) != 3: raise ValueError("Not an autoencoder") _x = T.dmatrix('x') _y = T.dmatrix('y') _lambda = C alpha = alpha # parameters w1 = shared(get_weights((layers[1], layers[0])), name="w1") w2 = shared(get_weights((layers[2], layers[1])), name="w2") b1 = shared(get_weights((layers[1], 1)), name="b1") b2 = shared(get_weights((layers[2], 1)), name="b2") a1 = _x.T z2 = T.dot(w1, a1) + b1.repeat(a1.shape[1], axis=1) a2 = 1.0 / (1 + T.exp(-z2)) z3 = T.dot(w2, a2) + b2.repeat(a2.shape[1], axis=1) a3 = 1.0 / (1 + T.exp(-z3)) self._predict = function([_x], [a3]) loss = T.sum((a3 - _y.T)**2) / 0.5 loss += _lambda / 2.0 * T.sum([(w1**2).sum(), (w2**2).sum()]) # Add the KL divergence term rhohat = a2.sum(axis=1) / a2.shape[1] kl_divergence = (rho * T.log10(rho / rhohat)) + \ (1 - rho) * T.log10((1 - rho) / (1 - rhohat)) loss += beta * kl_divergence.sum() grad_w1, grad_b1 = T.grad(loss, [w1, b1]) grad_w2, grad_b2 = T.grad(loss, [w2, b2]) updates = [(w1, w1 - alpha * grad_w1 / _x.shape[0]), (w2, w2 - alpha * grad_w2 / _x.shape[0]), (b1, b1 - alpha * grad_b1 / _x.shape[0]), (b2, b2 - alpha * grad_b2 / _x.shape[0])] self._train = function([_x, _y], [loss], updates=updates) self.w1 = w1
def theano_fft(x): x_win = x # zero-pad frame = T.zeros((x.shape[0], NFFT)) frame = T.set_subtensor(frame[:, :x.shape[1]], x_win) # apply FFT x = fft.rfft(frame, norm='ortho') # get first half of spectrum x = x[:, :fbins] # squared magnitude x = x[:, :, 0]**2 + x[:, :, 1]**2 # floor (prevents log from going to -Inf) x = T.maximum(x, 1e-9) # -90dB # map to log domain where 0dB -> 1 and -90dB -> -1 x = (20.0 / 90.0) * T.log10(x) + 1.0 # scale to weigh errors x = 0.1 * x return x
def learn(self, X, Y): Image_Test_PR = Y[0:32, :, :, :].astype('float32') X_d = T.tensor4() X_g = T.tensor4() temp = X.shape[0] Peak_Signal_Noise_Ratio = 10 * T.log10( (T.square(T.max(X_g).astype('float32')) / T.mean(T.square(X_d - X_g).astype('float32')))) #Peak_Signal_Noise_Ratio = skimage.measure.compare_ssim(X_d.astype('float32'),X_g.astype('float32')) Peak_Signal_Noise_Ratio_fn = theano.function([X_d, X_g], Peak_Signal_Noise_Ratio) train_batches = self.batch_gen(X, Y, temp) N_BATCHES = temp // self.Batch_Size Epoch_train_loss_AE = [] Epoch_train_SN_Ratio_AE = [] for nepoch in range(self.Size_Epoch): train_loss = 0 nbatch = 0 train_loss_AE = [] train_acc_AE = [] for nbatch in range(self.Batch_Size): X_b, Y_b = next(train_batches) Rec_Y_b = self.generate_fn_X(X_b) Y_b = Y_b / (255 / 2) Y_b = Y_b - 1 temp = self.train(Rec_Y_b, Y_b) train_loss += temp[0] Rec_Y_b_Test = self.predict(Image_Test_PR[0:32, :, :, :]) Temp = Rec_Y_b_Test + 1 Temp = Temp * (255 / 2) P_Signal_Noise_R6 = np.float( Peak_Signal_Noise_Ratio_fn(Image_Test_PR[0:1, :, :, :], Temp[0:1, :, :, :])) P_Signal_Noise_R7 = np.float( Peak_Signal_Noise_Ratio_fn(Image_Test_PR[1:2, :, :, :], Temp[1:2, :, :, :])) P_Signal_Noise_R8 = np.float( Peak_Signal_Noise_Ratio_fn(Image_Test_PR[31:32, :, :, :], Temp[31:32, :, :, :])) # P_Signal_Noise_R9,g1,g2 = skimage.measure.compare_ssim(Image_Test_PR[0,1,:,:],Temp[0,1,:,:]) # P_Signal_Noise_R7 = skimage.measure.compare_ssimImage_Test_PR[1,:,:,:],Temp[1,:,:,:]) # P_Signal_Noise_R8 = skimage.measure.compare_ssim(Image_Test_PR[2,:,:,:],Temp[2,:,:,:]) Temp0 = (P_Signal_Noise_R6 + P_Signal_Noise_R7 + P_Signal_Noise_R7) / 3 train_loss /= N_BATCHES Epoch_train_loss_AE.append(train_loss) Epoch_train_SN_Ratio_AE.append(Temp0) print(P_Signal_Noise_R6, P_Signal_Noise_R7, P_Signal_Noise_R8, Temp0) print("Epoch {} average loss = {}".format(nepoch, train_loss)) return Epoch_train_loss_AE, Epoch_train_SN_Ratio_AE
def corO2_7319_emisTensor(self, emis_ratio, cHbeta, flambda, O2_abund, O3_abund, Te_high): fluxCorr = tt.pow( 10, O2_abund + emis_ratio - flambda * cHbeta - 12) + tt.pow( 10, O3_abund + 0.9712758487381 + tt.log10( tt.pow(Te_high / 10000.0, 0.44)) - flambda * cHbeta - 12) return fluxCorr
def Recon_KL_loss_batch(self, train_x, batch_size): index = T.lscalar('index') batch_begin = index * batch_size batch_end = batch_begin + batch_size KL1 = self.lamda * T.log10(self.KL + 1) loss_com = theano.function( [index], outputs=[self.recon, KL1], givens={self.x: train_x[batch_begin:batch_end]}) return loss_com
def distmod_constant_curve(Om, Ok, h0, z): """ Distance modulus for a curved universe with a cosmological constant :param Om: matter content :param Ok: curvature :param h0: hubble constant :param z: redshift :return: theano array of dist. mods. """ # Hubble distance dh = sol * 1.e-3 / h0 # Comoving distance dc = dh * gauss_kronrod(integrand_constant_curve, z, parameters=[Om, Ok]) # Pre-compute the sqrt sqrtOk = T.sqrt(T.abs_(Ok)) # Theno does not have exhaustive # control flow, so we have to compute them all # Start here dl = ifelse(T.eq(Ok,0.), (1+z) * dc, 0. * (1+z) * dc) # The above statement is zero if the # condition fails, so we add on to it dl += ifelse(T.gt(Ok,0), (1+z) * dh / sqrtOk * T.sinh(sqrtOk * dc / dh), 0. * (1+z) * dc) # same idea as above dl += ifelse(T.lt(Ok,0), (1+z) * dh / sqrtOk * T.sin(sqrtOk * dc / dh), 0. * (1+z) * dc) return 5. * T.log10(dl) + 25. # dist mod
def log_lda(theta, phi,value,rowsums,sumall,phiAmbient=None): if phiAmbient is not None: phi=tt.concatenate([phi,phiAmbient],axis=0) else: phi=phi #ll = value[:,2] * pm.math.logsumexp(tt.log(theta[value[:,0].astype('int32')]+1e-9)+ tt.log(phi.T[value[:,1].astype('int32')]+1e-9),axis=1).ravel() #ambientll=ambient(tt.log10((rowsums)*(theta[:,theta.shape[1]-1])+1e-9)) ll = value[:,2] * pm.math.logsumexp(tt.log(theta[value[:,0].astype('int32')]+1e-10)+ tt.log(phi.T[value[:,1].astype('int32')]+1e-10),axis=1).ravel() ambientll=ambient(tt.log10((rowsums+1e-10)*(theta[:,theta.shape[1]-1]+1e-10))) tt.printing.Print('l')(tt.sum(ambientll)) tt.printing.Print('l')(tt.sum(ll)) tt.printing.Print('l')(sumall) tt.printing.Print('l')(ambientll.shape[1]) tt.printing.Print('sub')((tt.sum(ll)-sumall)) tt.printing.Print('sub')(tt.sum(ambientll)-rowsums.shape[1]) tt.printing.Print('div')((tt.sum(ll)/sumall)) tt.printing.Print('div')(tt.sum(ambientll)/rowsums.shape[1]) #return((ambientll.shape[1]*(tt.sum(ll)/sumall) + (tt.sum(ambientll)))) #return tt.sum(ll) + tt.sum(ambientll)*(sumall/rowsums.shape[1]) #return(tt.sum(ll)+ tt.sum(ambientll)) return((1e6*(tt.sum(ll)/sumall) + (tt.sum(ambientll)/ambientll.shape[1])*3e3))
def distmod_constant_flat(Om, h0, z): """ Distance modulus for a flat universe with a cosmological constant :param Om: matter content :param h0: hubble constant :param z: redshift :return: theano array of dist. mods. """ # Hubble distance dh = sol * 1.e-3 / h0 # comoving distance dc = dh * gauss_kronrod(integrand_constant_flat, z, parameters=[Om]) # luminosity distance dl = (1 + z) * dc return 5. * T.log10(dl) + 25. # dist mod.
def compile_train(self, *args): # args is a list of dictionaries if self.verbose: print('compiling training function...') import theano for arg_list in args: self.compiled_train_fn_list.append(theano.function(**arg_list)) if self.monitor_grad: norms = [grad.norm(L=2) for grad in self.grads] import theano.tensor as T norms = T.log10(norms) self.get_norm = theano.function([self.subb_ind], [T.sum(norms), T.max(norms)], givens=[(self.x, self.shared_x_slice), (self.y, self.shared_y_slice)] )
def distmod_constant_flat(Om, h0, z): """ Distance modulus for a flat universe with a cosmological constant :param Om: matter content :param h0: hubble constant :param z: redshift :return: theano array of dist. mods. """ # Hubble distance dh = sol * 1.0e-3 / h0 # comoving distance dc = dh * gauss_kronrod(integrand_constant_flat, z, parameters=[Om]) # luminosity distance dl = (1 + z) * dc return 5.0 * T.log10(dl) + 25.0 # dist mod.
def distmod_constant_curve(Om, Ok, h0, z): """ Distance modulus for a curved universe with a cosmological constant :param Om: matter content :param Ok: curvature :param h0: hubble constant :param z: redshift :return: theano array of dist. mods. """ # Hubble distance dh = sol * 1.0e-3 / h0 # Comoving distance dc = dh * gauss_kronrod(integrand_constant_curve, z, parameters=[Om, Ok]) # Pre-compute the sqrt sqrtOk = T.sqrt(T.abs_(Ok)) # Theno does not have exhaustive # control flow, so we have to compute them all # Start here dl = ifelse(T.eq(Ok, 0.0), (1 + z) * dc, 0.0 * (1 + z) * dc) # The above statement is zero if the # condition fails, so we add on to it dl += ifelse(T.gt(Ok, 0), (1 + z) * dh / sqrtOk * T.sinh(sqrtOk * dc / dh), 0.0 * (1 + z) * dc) # same idea as above dl += ifelse(T.lt(Ok, 0), (1 + z) * dh / sqrtOk * T.sin(sqrtOk * dc / dh), 0.0 * (1 + z) * dc) return 5.0 * T.log10(dl) + 25.0 # dist mod
def emisEquation_HI_pm(temp_range, den_range, a, b, c): return a + b * np.log(temp_range) + c * tt.log10(temp_range) * tt.log10( temp_range)
def emisEquation_TeDe_pm(temp_range, den_range, a, b, c, d, e): return a + b / temp_range + c * tt.log10(temp_range) + tt.log10(1 + e * den_range)
def emisEquation_Te_pm(temp_range, den_range, a, b, c): return a + b / temp_range + c * tt.log10(temp_range)
def train_SRCNN(train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y, n_train_batches, n_valid_batches, n_test_batches, n_epochs, batch_size, learning_rate, upsampling_factor=4): #Assume x to be shape (batch_size,3,33,33) x = T.matrix('x') y = T.matrix('y') theano.config.optimizer = 'fast_compile' print "theano optimizer: " + str(theano.config.optimizer) rng = np.random.RandomState(11111) index = T.lscalar() reshaped_input = x.reshape((batch_size, 3, 33, 33)) reshaped_gt = y.reshape((batch_size, 3, 33, 33)) #Upsampling layer now done in preprocessing to save compute #upsampled_input = T.nnet.abstract_conv.bilinear_upsampling(reshaped_input,upsampling_factor,batch_size=batch_size,num_input_channels=3) # r_fun = theano.function([index],upsampled_input.shape,givens = { # x: train_set_x[index * batch_size: (index + 1) * batch_size] # }) # theano.printing.debugprint(r_fun(0)) #Filter params f1 = 9 f2 = 1 f3 = 5 output_len = 33 - f1 - f2 - f3 + 3 #Conv for Patch extraction conv1 = Conv_Layer_ReLU(rng, reshaped_input, image_shape=(batch_size, 3, 33, 33), filter_shape=(64, 3, f1, f1)) conv1_len = 33 - f1 + 1 #Conv for Non linear mapping conv2 = Conv_Layer_ReLU(rng, conv1.output, image_shape=(batch_size, 64, conv1_len, conv1_len), filter_shape=(32, 64, f2, f2)) conv2_len = conv1_len - f2 + 1 #Conv for Reconstruction conv3 = Conv_Layer_None(rng, conv2.output, image_shape=(batch_size, 32, conv2_len, conv2_len), filter_shape=(3, 32, f3, f3)) model_output = conv3.output sub_y = reshaped_gt[:, :, :output_len, :output_len] #MSE between center pixels of prediction and ground truth cost = 1.0 / batch_size * T.sum((sub_y - model_output)**2) #Perchannel cost # costs = [] # for d in sub_y.shape[0]: # channel_cost = cost = 1.0/batch_size * T.sum((sub_y[d,:,:]-model_output[d,:,:]) ** 2) # costs.append(channel_cost) params = conv3.params + conv2.params + conv1.params #ADAM opt beta1 = theano.shared(np.cast[theano.config.floatX](0.9), name='beta1') beta2 = theano.shared(np.cast[theano.config.floatX](0.999), name='beta2') eps = theano.shared(np.cast[theano.config.floatX](1e-8), name='eps') updates = [] for param in params: param_update = theano.shared(param.get_value() * np.cast[theano.config.floatX](0.)) m = theano.shared(param.get_value() * np.cast[theano.config.floatX](0.)) v = theano.shared(param.get_value() * np.cast[theano.config.floatX](0.)) updates.append((param, param - learning_rate * param_update)) updates.append((param_update, m / (T.sqrt(v) + eps))) updates.append(( m, beta1 * m + (np.cast[theano.config.floatX](1.) - beta1) * T.grad(cost, param))) updates.append( (v, beta2 * v + (np.cast[theano.config.floatX](1.) - beta2) * T.sqr(T.grad(cost, param)))) #PSNR of a patch is based on color space MSE_per_pixel = cost / (output_len * output_len * 3) psnr = 20 * T.log10(255) - 10 * T.log10(MSE_per_pixel) reconstucted_imgs = model_output #Theano function complilation #if neccessary, could load here test_model = theano.function( [index], [cost, MSE_per_pixel, psnr, reconstucted_imgs], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], [cost, MSE_per_pixel, psnr, reconstucted_imgs], givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) train_model = theano.function( [index], [cost, MSE_per_pixel, psnr], updates=updates, givens={ y: train_set_y[index * batch_size:(index + 1) * batch_size], x: train_set_x[index * batch_size:(index + 1) * batch_size] }) train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, output_len, verbose=True) return validate_model, test_model
def sump(p_b,stop_t): logp = T.sum(T.log10(p_b[:stop_t])) return logp
size_dist = (BATCH_SIZE,size_distribution+size_Feature) Generator = generative(BATCH_SIZE,size_dist,Z_random) Generator.g_architecture() Generate_X = lasagne.layers.get_output(Generator.architecture[-1],Z_random,deterministic=False) output_G_Train = lasagne.layers.get_output(Discriminator.architecture[-1], Generate_X, deterministic=False) output_D_Train = lasagne.layers.get_output(Discriminator.architecture[-1], X_d, deterministic=False) ################################################################################# #### Le Discrininator sait que echantillon simule est essentiellement simule voir avec 1 coe ds le papier #### Calcul de la fonction de perte via binary entropy voir papier lecun prevision Video loss_penality0 = T.mean(sqError(X1Q1D,X1Q1G))+ T.mean(sqError(X1Q2D,X1Q2G))+T.mean(sqError(X1Q3D,X1Q3G))+T.mean(sqError(X1Q4D,X1Q4G))+T.mean(sqError(X2Q1D,X2Q1G))+ T.mean(sqError(X2Q2D,X2Q2G))+T.mean(sqError(X2Q3D,X2Q3G))+T.mean(sqError(X2Q4D,X2Q4G))+T.mean(sqError(X3Q1D,X3Q1G))+ T.mean(sqError(X3Q2D,X3Q2G))+T.mean(sqError(X3Q3D,X3Q3G))+T.mean(sqError(X3Q4D,X3Q4G)) #loss_penality0 = T.mean(sqError(X1Q1D,X1Q1G))+ T.mean(sqError(X1Q2D,X1Q2G))+T.mean(sqError(X1Q3D,X1Q3G))+T.mean(sqError(X1Q4D,X1Q4G)) loss_penality1 =T.mean(sqError(FeatD,FeatG)) #loss_penality = 0.5*loss_penality1 + 0.5*loss_penality0 # 25 premiers loss_penality = 0.001*loss_penality1 + 0.999*loss_penality0 # 25 deuxiemes Peak_Signal_Noise_Ratio = 10*T.log10((T.square(T.max(X_g).astype('float32'))/T.mean(T.square(X_d-X_g).astype('float32')))) ################################################## Train # mean and Peak are the traditionl measure, add structural similarity. take account interaction and and window (look Wikipedia) temp_D = - T.mean(T.log(output_D_Train)) temp_G = - T.mean(T.log(1 - output_G_Train)) temp_G1 = - T.mean(T.log(output_G_Train)) acc_D0 = T.mean(output_D_Train > 0.5) acc_D1 = T.mean(output_G_Train < 0.5) acc_D = 0.5*(acc_D0 + acc_D1) #loss_G = temp_G1 + (loss_penality) # 25 premiers loss_G = temp_G1 + 1*(loss_penality) # 25 deuxieme loss_D = temp_D + temp_G #loss = [loss_D,acc_D,loss_G,loss_penality0,loss_penality1,temp_G1,temp_D,Peak_Signal_Noise_Ratio] loss = [loss_D,acc_D,loss_G,loss_penality0,loss_penality1,temp_G1,temp_D,Peak_Signal_Noise_Ratio]
def emisEquation_TeDe_tt(self, xy_space, a, b, c, d, e): temp_range, den_range = xy_space return a + b / (temp_range / 10000.0) + c * tt.log10( temp_range / 10000) + tt.log10(1 + e * den_range)
def initialize(self,rng, output_taps, n_in, n_hidden, n_out, samples, mode, profile, dtype=theano.config.floatX,params=None,n_features=0): """ :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type order: int32 :param order: order of the RNN (used for higher order RNNs) :type n_in: int32 :param n_in: number of input neurons :type n_hidden: int32 :param n_hidden: number of hidden units :type dtype: theano.config.floatX :param dtype: theano 32/64bit mode """ # length of output taps self.len_output_taps = len(output_taps) # input (where first dimension is time) self.u = T.matrix() #tensor3() # target (where first dimension is time) self.t = T.ivector() # initial hidden state of the RNN self.H = T.vector() # matrix() # learning rate self.lr = T.scalar() self.n_hidden = n_hidden self.samples = samples self.n_features = n_features self.n_in = n_in if params == None: # recurrent weights as real values W = [theano.shared(numpy.random.uniform(size=(n_hidden, n_hidden), low= -.01, high=.01).astype(dtype), name='W_r' + str(output_taps[u])) for u in range(self.len_output_taps)] # recurrent bias b_h = theano.shared(numpy.zeros((n_hidden,)).astype(dtype), name='b_h') # recurrent activations self.h = theano.shared(numpy.zeros((n_hidden,)).astype(dtype), name='h') # input to hidden layer weights W_in = theano.shared(numpy.random.uniform(size=(n_in, n_hidden), low= -.01, high=.01).astype(dtype), name='W_in') # input bias b_in = theano.shared(numpy.zeros((n_hidden,)).astype(dtype), name='b_in') # hidden to output layer weights W_out = theano.shared(numpy.random.uniform(size=(n_hidden, n_out), low= -.01, high=.01).astype(dtype), name='W_out') # output bias b_out = theano.shared(numpy.zeros((n_out,)).astype(dtype), name='b_out') # weight vector from input feature layer to the hidden layer if n_features > 0: Wf_in = theano.shared(numpy.random.uniform(size=(n_features, n_hidden), low= -.01, high=.01).astype(dtype), name='Wf_in') else: # recurrent weights as real values W = [theano.shared(params[0],name='W_r' + str(output_taps[u])) for u in range(self.len_output_taps)] # recurrent bias #b_h = theano.shared(params[1], name='b_h') # recurrent activations self.h = theano.shared(numpy.zeros((n_hidden,)).astype(dtype), name='h') # input to hidden layer weights W_in = theano.shared(params[1], name='W_in') # input bias #b_in = theano.shared(params[3], name='b_in') # hidden to output layer weights W_out = theano.shared(params[2], name='W_out') # output bias #b_out = theano.shared(params[5], name='b_out') # weight vector from input feature layer to the hidden layer if n_features > 0: Wf_in = theano.shared(params[3], name='Wf_in') #Wf_in = theano.shared(numpy.random.uniform(size=(n_features, n_hidden), low= -.01, high=.01).astype(dtype), name='Wf_in') # stack the network parameters self.params = [] self.params.extend(W) #self.params.extend([b_h]) self.params.extend([W_in]) #([W_in, b_in]) self.params.extend([W_out]) #([W_out, b_out]) if n_features > 0: self.params.extend([Wf_in]) self.L1 = abs(W[0]).sum() + abs(W_in).sum() + abs(W_out).sum() if n_features > 0: self.L1 = self.L1 + abs(Wf_in).sum() self.L2 = (W[0] ** 2).sum() + (W_in ** 2 ).sum() + ( W_out ** 2).sum() if n_features > 0: self.L2 = self.L2 + (Wf_in ** 2).sum() self.lambdaL1 = 0.0 self.lambdaL2 = 1e-7 # the hidden state `h` for the entire sequence, and the output for the # entry sequence `y` (first dimension is always time) [h, y], updates = theano.scan(self.step, sequences=self.u, outputs_info=[dict(initial=self.H, taps=[-1]),None], non_sequences=self.params, truncate_gradient=5, mode=mode, profile=profile) # compute the output of the network # theano has no softmax tensor3() support at the moment #y, updates = theano.scan(self.softmax_tensor, # sequences=h, # non_sequences=[W_out, b_out], # mode=mode, # profile=profile) # error between output and target y = y.reshape((samples*1,n_out)) self.lprob_y_given_x = T.log10(y)[T.arange(self.t.shape[0]), self.t] self.cost = -T.mean(T.log10(y)[T.arange(self.t.shape[0]), self.t]) self.last_hidden = h[samples-1]
def train_FSRCNN_VGG(train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y, n_train_batches, n_valid_batches, n_test_batches, n_epochs, batch_size, lr, upsampling_factor=4): #Assume x to be shape (batch_size,3,33,33) x = T.matrix('x') y = T.matrix('y') theano.config.optimizer = 'fast_compile' print "theano optimizer: " + str(theano.config.optimizer) rng = np.random.RandomState(11111) index = T.lscalar() reshaped_input = x.reshape((batch_size, 3, 8, 8)) reshaped_gt = y.reshape((batch_size, 3, 33, 33)) learning_rate = theano.shared(np.cast[theano.config.floatX](lr)) #Upsampling layer now done in preprocessing to save compute #upsampled_input = T.nnet.abstract_conv.bilinear_upsampling(reshaped_input,upsampling_factor,batch_size=batch_size,num_input_channels=3) # r_fun = theano.function([index],upsampled_input.shape,givens = { # x: train_set_x[index * batch_size: (index + 1) * batch_size] # }) # theano.printing.debugprint(r_fun(0)) #Filter params f1 = 9 f2 = 5 f3 = 9 input_image_size = 8 output_len = input_image_size + f3 - 1 #output_len = 16 #Conv for Patch extraction #print('batch size', batch_size) conv1 = Conv_Layer_ReLU(rng, reshaped_input, image_shape=(batch_size, 3, input_image_size, input_image_size), filter_shape=(64, 3, f1, f1)) conv1_len = input_image_size #Conv for Non linear mapping #print('conv1 done....') conv2 = Conv_Layer_ReLU(rng, conv1.output, image_shape=(batch_size, 64, conv1_len, conv1_len), filter_shape=(32, 64, f2, f2)) conv2_len = conv1_len #Conv for Reconstruction #conv2_output = conv2.output.repeat(2,2) #conv2_output = conv2_output.repeat(2,3) #conv3 = Conv_Layer_ReLU(rng, conv2.output, image_shape=(batch_size,32,conv2_len*2,conv2_len*2),filter_shape = (3,32,f3,f3)) #model_output = conv3.output conv3 = De_Conv_Layer_ReLU(rng, conv2.output, image_shape=(batch_size, 32, conv2_len, conv2_len), filter_shape=(3, 32, f3, f3)) model_output = conv3.output model_output_clipped = model_output #T.clip(model_output,0.0,1.0) model_output_copied = model_output_clipped mat = scipy.io.loadmat('Layers1to7_16vgg.mat') im_dims = np.asarray([16, 16], dtype=int) print('... building the vgg model for low resolution input') print('im_dims=', im_dims) layer1_low_input = conv3.output #conv3-64 layers layer1_low = Conv_Layer_VGG(input=layer1_low_input, image_shape=(batch_size, 3, im_dims[0], im_dims[1]), filter_shape=(64, 3, 3, 3), weights=load_weights(mat, 1)) layer2_low = Conv_Layer_VGG(input=layer1_low.output, image_shape=(batch_size, 64, im_dims[0], im_dims[1]), filter_shape=(64, 64, 3, 3), weights=load_weights(mat, 2)) layer3_low_input = pool.pool_2d(input=layer2_low.output, ds=(2, 2), ignore_border=True) im_dims = np.asarray(im_dims / 2, dtype=int) #conv3-128 layers layer3_low = Conv_Layer_VGG(input=layer3_low_input, image_shape=(batch_size, 64, im_dims[0], im_dims[1]), filter_shape=(128, 64, 3, 3), weights=load_weights(mat, 3)) layer4_low = Conv_Layer_VGG(input=layer3_low.output, image_shape=(batch_size, 128, im_dims[0], im_dims[1]), filter_shape=(128, 128, 3, 3), weights=load_weights(mat, 4)) model_output = layer4_low.output output_len_low = im_dims[0] im_dims = np.asarray([33, 33], dtype=int) print('... building the vgg model for high resolution input') print('im_dims=', im_dims) layer1_high_input = reshaped_gt #conv3-64 layers layer1_high = Conv_Layer_VGG(input=layer1_high_input, image_shape=(batch_size, 3, im_dims[0], im_dims[1]), filter_shape=(64, 3, 3, 3), weights=load_weights(mat, 1)) layer2_high = Conv_Layer_VGG(input=layer1_high.output, image_shape=(batch_size, 64, im_dims[0], im_dims[1]), filter_shape=(64, 64, 3, 3), weights=load_weights(mat, 2)) layer3_high_input = pool.pool_2d(input=layer2_high.output, ds=(2, 2), ignore_border=True) im_dims = np.asarray(im_dims / 2, dtype=int) #conv3-128 layers layer3_high = Conv_Layer_VGG(input=layer3_high_input, image_shape=(batch_size, 64, im_dims[0], im_dims[1]), filter_shape=(128, 64, 3, 3), weights=load_weights(mat, 3)) layer4_high = Conv_Layer_VGG(input=layer3_high.output, image_shape=(batch_size, 128, im_dims[0], im_dims[1]), filter_shape=(128, 128, 3, 3), weights=load_weights(mat, 4)) high_res_model_output = layer4_high.output output_len_high = im_dims[0] #VGG SPACE: grab center pixels center_start = int((output_len_high - output_len_low) / 2) center_end = output_len_high - center_start #if(output_len % 2==0): # center_end = center_end - 1 sub_y = high_res_model_output[:, :, center_start:center_end, center_start:center_end] #MSE between center pixels of prediction (v_gg_low_res) and ground truth(v_gg_high_res) cost2 = 1.0 / batch_size * T.sum((sub_y - model_output)**2) cost = T.mean((sub_y - model_output)**2) #PSNR of a patch is based on color space MSE_per_pixel = cost2 / (output_len_low * output_len_low * 128) psnr = 20 * T.log10(255) - 10 * T.log10(MSE_per_pixel) #ORIGINAL SPACE: grab center pixels center_start = int((33 - output_len) / 2) center_end = 33 - center_start if (output_len % 2 == 0): center_end = center_end - 1 sub_y_orig = reshaped_gt[:, :, center_start:center_end, center_start:center_end] #MSE between center pixels of prediction and ground truth cost_orig = 1.0 / batch_size * T.sum((sub_y_orig - layer1_low_input)**2) cost_orig2 = T.mean((sub_y_orig - layer1_low_input)**2) #PSNR of a patch is based on color space MSE_per_pixel_orig = cost_orig / (output_len * output_len * 3) psnr_orig = 20 * T.log10(255) - 10 * T.log10(MSE_per_pixel_orig) #cost = cost + cost_orig2 #reconstucted_imgs = layer1_low_input*255 #PSNR of a patch is based on color space # MSE_per_pixel = cost/(output_len*output_len*3) # psnr = 20 * T.log10(1) - 10 * T.log10(MSE_per_pixel) # reconstucted_imgs = model_output_clipped * 255 reconstucted_imgs = model_output_clipped params = conv3.params + conv2.params + conv1.params # #ADAM opt # beta1 =theano.shared(np.cast[theano.config.floatX](0.9), name='beta1') # beta2 =theano.shared(np.cast[theano.config.floatX](0.999), name='beta2') # eps =theano.shared(np.cast[theano.config.floatX](1e-8), name='eps') # updates = [] # for param in params: # m = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) # v = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) # new_m = beta1 * m + (np.cast[theano.config.floatX](1.) - beta1) * T.grad(cost, param) # new_v = beta2 * v + (np.cast[theano.config.floatX](1.) - beta2) * T.sqr(T.grad(cost, param)) # updates.append((m, new_m)) # updates.append((v, new_v)) # updates.append((param, param - learning_rate*new_m/(T.sqrt(new_v) + eps))) #RMSProp updates = [] for param in params: cache = theano.shared(param.get_value() * np.cast[theano.config.floatX](0.)) rms_decay = np.cast[theano.config.floatX](0.999) eps = theano.shared(np.cast[theano.config.floatX](1e-6)) clip_grad = T.grad(cost, param) clip_grad = T.clip(clip_grad, -1.0, 1.0) new_cache = rms_decay * cache + (np.cast[theano.config.floatX] (1.0) - rms_decay) * clip_grad**2 updates.append((cache, new_cache)) updates.append( (param, param - (learning_rate * clip_grad) / (T.sqrt(new_cache) + eps))) #nesterov momentum # updates = [] # mu = np.cast[theano.config.floatX](.9) # for param in params: # v_prev = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) # v = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) # clip_grad = T.grad(cost,param) # clip_grad = T.clip(clip_grad,-1.0,1.0) # new_v_prev = v # new_v = mu * v - learning_rate * clip_grad # updates.append((v_prev, new_v_prev)) # updates.append((v, new_v)) # updates.append((param,param - mu * new_v_prev + (np.cast[theano.config.floatX](1.0) + mu) * new_v)) # #SGD # clip_thresh = 1.0 # early_params = conv1.params + conv2.params # for param in early_params: # clip_grad = T.grad(cost,param) # clip_grad = T.clip(clip_grad,-1.0,1.0) # updates = [ # (param, param - 1e-4 * clip_grad) # ] # clip_thresh = 1.0 # for param in conv3.params: # clip_grad = T.grad(cost,param) # clip_grad = T.clip(clip_grad,-1.0,1.0) # updates = [ # (param, param - 1e-5 * clip_grad) # ] #Theano function complilation #if neccessary, could load here test_model = theano.function( [index], [cost, MSE_per_pixel, psnr, reconstucted_imgs], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], [cost, MSE_per_pixel, psnr, reconstucted_imgs], givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) train_model = theano.function( [index], [cost, MSE_per_pixel, psnr, reconstucted_imgs], updates=updates, givens={ y: train_set_y[index * batch_size:(index + 1) * batch_size], x: train_set_x[index * batch_size:(index + 1) * batch_size] }) decay_learning_rate_function = theano.function([], learning_rate, updates=[ (learning_rate, learning_rate * 0.99) ]) train_nn_vgg(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, output_len, decay_learning_rate_function, verbose=False) test_losses = [] test_reconstructed = [] test_pnsr = [] test_MSE = [] for i in xrange(n_test_batches): test_output = test_model(i) test_losses.append(test_output[0]) test_MSE.append(test_output[1]) test_pnsr.append(test_output[2]) test_reconstructed.append(test_output[3]) return validate_model, test_model
def recur(self, w_j, y_j, sshot_tj, oh_jm1, oc_jm1, intent_t, degree_t, belief_t ,actEmb_t ): epsln = 10e-6 # current input in_j = T.nnet.sigmoid( self.Wemb[w_j] ) # policy embedding if self.ply=='attention': actEmb_tj = self.policy.encode(belief_t, degree_t, intent_t, oh_jm1, in_j.dimshuffle('x',0)) else: # normal or latent policy actEmb_tj = actEmb_t # snapshot if self.use_snapshot: prdtshot_j = actEmb_tj[:,:self.dsp]/2.0+0.5 snapcost_j = \ T.mean(sshot_tj*T.log10(prdtshot_j+epsln))+\ T.mean((1.0-sshot_tj)*T.log10(1.0-prdtshot_j+epsln)) else: snapcost_j = T.sum(sshot_tj) # syntatic memory cell and gate # compute i, f, o, c together and slice it bundle_j = T.dot(in_j, self.oWgate).dimshuffle('x',0)+\ T.dot(oh_jm1, self.oUgate) bundle_aj= T.dot(actEmb_tj,self.Wzh) # input gate ig = T.nnet.sigmoid(bundle_j[:,:self.doh]+ bundle_aj[:,:self.doh]+ self.b[:self.doh]) # use forget bias or not fg = T.nnet.sigmoid(bundle_j[:,self.doh:self.doh*2]+ bundle_aj[:,self.doh:self.doh*2]+ self.b[self.doh:self.doh*2]) # output gate og = T.nnet.sigmoid(bundle_j[:,self.doh*2:self.doh*3]+ bundle_aj[:,self.doh*2:self.doh*3]+ self.b[self.doh*2:self.doh*3]) # proposed memory cell # reading gate, memory cell, hidden layer if self.struct=='lstm_cond': # reading gate control signal rg = T.nnet.sigmoid( bundle_j[:,self.doh*4:self.doh*5]+ bundle_aj[:,self.doh*4:self.doh*5]+ self.b[self.doh*3:]) cx_j = T.tanh(bundle_j[:,self.doh*3:self.doh*4]) oc_j = ig*cx_j + fg*oc_jm1 +\ rg*T.tanh(bundle_aj[:,self.doh*3:self.doh*4]) oh_j = og*T.tanh(oc_j) o_j = T.nnet.softmax( T.dot(oh_j,self.Who) ) elif self.struct=='lstm_mix':# two signals rg = T.nnet.sigmoid( bundle_j[:,self.doh*4:self.doh*5]+ bundle_aj[:,self.doh*4:self.doh*5]+ self.b[self.doh*3:]) cx_j = T.tanh(bundle_j[:,self.doh*3:self.doh*4]) oc_j = ig*cx_j + fg*oc_jm1 oh_j = og*T.tanh(oc_j) + \ rg*T.tanh(bundle_aj[:,self.doh*3:self.doh*4]) o_j = T.nnet.softmax( T.dot(oh_j,self.Who) ) elif self.struct=='lstm_lm': # lm style cx_j = T.tanh( bundle_j[:,self.doh*3:self.doh*4]+ bundle_aj[:,self.doh*3:self.doh*4]) oc_j = ig*cx_j + fg*oc_jm1 oh_j = og*T.tanh(oc_j) o_j = T.nnet.softmax( T.dot(oh_j,self.Who) ) else: sys.exit('[ERROR]: Unseen decoder structure '+self.struct) # compute output distribution and cross entropy error p_j = o_j[:,y_j] return oh_j, oc_j, p_j, snapcost_j
def inference_model(self, fit_T_low=True, fit_T_high=True): # Container to store the synthetic line fluxes self.paramDict = {} # FIXME do I need this one for loop inferences # Define observable input fluxTensor = tt.zeros(self.lineLabels.size) inputFlux = np.log10(self.emissionFluxes) inputFluxErr = np.log10(1 + self.emissionErr / self.emissionFluxes) if self.ionizationModels_Check: inputGridFlux = np.log10(self.grid_emissionFluxes) inputGridErr = np.log10(1 + self.grid_emissionFluxErrs / self.grid_emissionFluxes) linesTensorLabels = np.array([f'{self.grid_LineLabels[i]}_Op' for i in range(self.grid_LineLabels.size)]) # Define the counters for loops linesRangeArray = np.arange(self.lineLabels.size) # Assign variable values self.paramDict['H1'] = 0.0 with pymc3.Model() as self.inferenModel: # Declare model parameters priors self.set_prior('n_e') self.set_prior('cHbeta') # Establish model temperature structure self.temperature_selection(fit_T_low, fit_T_high) # Define grid interpolation variables emisCoord_low = tt.stack([[self.paramDict['T_low'][0]], [self.paramDict['n_e'][0]]], axis=-1) emisCoord_high = tt.stack([[self.paramDict['T_high'][0]], [self.paramDict['n_e'][0]]], axis=-1) # Establish model composition for ion in self.obsIons: if ion != 'H1': self.set_prior(ion, abund=True, name_param=ion) if self.ionizationModels_Check: self.set_prior('Teff') self.set_prior('logU') O2_abund = tt.power(10, self.paramDict['O2'] - 12) O3_abund = tt.power(10, self.paramDict['O3'] - 12) OH = tt.log10(O2_abund + O3_abund) + 12 grid_coord = tt.stack([[self.paramDict['logU']], [self.paramDict['Teff']], [OH]], axis=-1) # Loop through the lines to compute the synthetic fluxes for i in linesRangeArray: # Declare line properties lineLabel = self.lineLabels[i] lineIon = self.lineIons[i] lineFlambda = self.lineFlambda[i] # Compute emisivity for the corresponding ion temperature T_calc = emisCoord_high if self.idcs_highTemp_ions[i] else emisCoord_low line_emis = self.emisGridInterpFun[lineLabel](T_calc) # Declare fluorescence correction lineftau = 0.0 # Compute line flux lineFlux_i = self.emtt.compute_flux(lineLabel, line_emis[0][0], self.paramDict['cHbeta'], lineFlambda, self.paramDict[lineIon], lineftau, O3=self.paramDict['O3'], T_high=self.paramDict['T_high']) if self.idx_analysis_lines[i]: # Line Flux lineInt = self.gridInterp[lineLabel](grid_coord) # Line Intensity lineFlux = lineInt - self.paramDict['cHbeta'] * lineFlambda # Inference pymc3.Deterministic(linesTensorLabels[i], lineFlux) Y_grid = pymc3.Normal(lineLabel, mu=lineFlux, sd=inputGridErr[i], observed=inputGridFlux[i]) # Assign the new value in the tensor fluxTensor = storeValueInTensor(i, lineFlux_i[0], fluxTensor) # Store computed fluxes pymc3.Deterministic('calcFluxes_Op', fluxTensor) # Likelihood gas components Y_emision = pymc3.Normal('Y_emision', mu=fluxTensor, sd=inputFluxErr, observed=inputFlux) # Display simulation data displaySimulationData(self.inferenModel) # self.inferenModel.profile(self.inferenModel.logpt).summary() # self.inferenModel.profile(pymc3.gradient(self.inferenModel.logpt, self.inferenModel.vars)).summary() return
def tot_ppl(self, y,penalty=[]): if penalty==[]: return -T.mean(T.log10(self.p_y_given_x)[T.arange(y.shape[0]), y]) else: return -T.mean(T.log10( (self.p_y_given_x)[T.arange(y.shape[0]), y]*penalty) )
def H1_linesEmis_tt(xy_space, a, b, c): temp_range, den_range = xy_space return a + b * tt.log10(temp_range) + c * tt.log10(temp_range) * tt.log10(temp_range)
def train_FSRCNN(train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y, n_train_batches, n_valid_batches, n_test_batches, n_epochs, batch_size, lr, upsampling_factor=4): #Assume x to be shape (batch_size,3,33,33) x = T.matrix('x') y = T.matrix('y') theano.config.optimizer = 'fast_compile' print "theano optimizer: " + str(theano.config.optimizer) rng = np.random.RandomState(11111) index = T.lscalar() reshaped_input = x.reshape((batch_size, 3, 8, 8)) reshaped_gt = y.reshape((batch_size, 3, 33, 33)) learning_rate = theano.shared(np.cast[theano.config.floatX](lr)) #Upsampling layer now done in preprocessing to save compute #upsampled_input = T.nnet.abstract_conv.bilinear_upsampling(reshaped_input,upsampling_factor,batch_size=batch_size,num_input_channels=3) # r_fun = theano.function([index],upsampled_input.shape,givens = { # x: train_set_x[index * batch_size: (index + 1) * batch_size] # }) # theano.printing.debugprint(r_fun(0)) #Filter params f1 = 5 f2 = 3 f3 = 3 f4 = 3 f5 = 9 input_image_size = 8 output_len = (input_image_size - 1) * 2 + f5 #output_len = input_image_size*2 #Conv for Patch extraction print('batch size', batch_size) conv1 = Conv_Layer_ReLU(rng, reshaped_input, image_shape=(batch_size, 3, input_image_size, input_image_size), filter_shape=(56, 3, f1, f1)) conv1_len = input_image_size #Conv for Non linear mapping print('conv1 done....') conv2 = Conv_Layer_ReLU(rng, conv1.output, image_shape=(batch_size, 56, conv1_len, conv1_len), filter_shape=(12, 56, f2, f2)) conv2_len = conv1_len #Conv for Reconstruction conv3 = Conv_Layer_ReLU(rng, conv2.output, image_shape=(batch_size, 12, conv1_len, conv1_len), filter_shape=(12, 12, f3, f3)) conv3_len = conv1_len conv4 = Conv_Layer_ReLU(rng, conv3.output, image_shape=(batch_size, 12, conv1_len, conv1_len), filter_shape=(12, 12, f3, f3)) conv4_len = conv1_len conv5 = Conv_Layer_ReLU(rng, conv4.output + conv2.output, image_shape=(batch_size, 12, conv1_len, conv1_len), filter_shape=(12, 12, f3, f3)) conv5_len = conv1_len conv6 = Conv_Layer_ReLU(rng, conv5.output + conv3.output, image_shape=(batch_size, 12, conv1_len, conv1_len), filter_shape=(12, 12, f3, f3)) conv6_len = conv1_len conv7 = Conv_Layer_ReLU(rng, conv6.output + conv4.output, image_shape=(batch_size, 12, conv1_len, conv1_len), filter_shape=(56, 12, f4, f4)) conv8 = Conv_Layer_ReLU(rng, conv7.output, image_shape=(batch_size, 56, conv1_len, conv1_len), filter_shape=(3, 56, f4, f4)) conv9 = Deconv_Last.De_Conv_Layer_ReLU(rng, conv8.output, image_shape=(batch_size, 3, output_len, output_len), filter_shape=(3, 3, f5, f5)) output_shape = (batch_size, 3, 16, 16) #this implemention didd't work :-( !!! #conv8 = deconv_layer.deconv(conv7.output, (3,56,f5,f5), subsample=(2, 2), border_mode=(0, 0), conv_mode='conv') model_output = conv9.output print(model_output.shape) #grab center pixels print('output len...', output_len) center_start = (33 - output_len) / 2 center_end = 33 - center_start if (output_len % 2 == 0): center_end = center_end - 1 sub_y = reshaped_gt[:, :, center_start:center_end, center_start:center_end] #sub_y = reshaped_gt #MSE between center pixels of prediction and ground truth cost = T.mean((sub_y - model_output)**2) cost2 = 1.0 / batch_size * T.sum((sub_y - model_output)**2) #PSNR of a patch is based on color space MSE_per_pixel = cost2 / (output_len * output_len * 3) psnr = 20 * T.log10(255) - 10 * T.log10(MSE_per_pixel) reconstucted_imgs = model_output #Perchannel cost iok # costs = [] # for d in sub_y.shape[0]: # channel_cost = cost = 1.0/batch_size * T.sum((sub_y[d,:,:]-model_output[d,:,:]) ** 2) # costs.append(channel_cost) params = conv3.params + conv2.params + conv1.params + conv4.params + conv5.params + conv6.params + conv7.params + conv8.params + conv9.params # #ADAM opt beta1 = theano.shared(np.cast[theano.config.floatX](0.9), name='beta1') beta2 = theano.shared(np.cast[theano.config.floatX](0.999), name='beta2') eps = theano.shared(np.cast[theano.config.floatX](1e-8), name='eps') updates = [] for param in params: m = theano.shared(param.get_value() * np.cast[theano.config.floatX](0.)) v = theano.shared(param.get_value() * np.cast[theano.config.floatX](0.)) new_m = beta1 * m + (np.cast[theano.config.floatX] (1.) - beta1) * T.grad(cost, param) new_v = beta2 * v + (np.cast[theano.config.floatX] (1.) - beta2) * T.sqr(T.grad(cost, param)) updates.append((m, new_m)) updates.append((v, new_v)) updates.append( (param, param - learning_rate * new_m / (T.sqrt(new_v) + eps))) #RMSProp #updates = [] #updates = rmsprop(cost, params, learning_rate=0.01, rho=0.9, epsilon=1e-8) #nesterov momentum # updates = [] # mu = np.cast[theano.config.floatX](.9) # for param in params: # v_prev = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) # v = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) # clip_grad = T.grad(cost,param) # if T.ge(np.cast[theano.config.floatX](1.0),clip_grad): # clip_grad = np.cast[theano.config.floatX](1.0) # if T.le(np.cast[theano.config.floatX](-1.0),clip_grad): # clip_grad = np.cast[theano.config.floatX](-1.0) # new_v_prev = v # new_v = mu * v - learning_rate * clip_grad # updates.append((v_prev, new_v_prev)) # updates.append((v, new_v)) # updates.append((param,param - mu * new_v_prev + (np.cast[theano.config.floatX](1.0) + mu) * new_v)) #SGD # clip_thresh = 1.0 # for param in params: # clip_grad = T.grad(cost,param) # if T.ge(clip_thresh,clip_grad): # clip_grad = np.cast[theano.config.floatX](clip_thresh) # if T.le(-clip_thresh,clip_grad): # clip_grad = np.cast[theano.config.floatX](-clip_thresh) # updates = [ # (param, param - learning_rate * clip_grad) # ] #Theano function complilation #if neccessary, could load here test_model = theano.function( [index], [cost, MSE_per_pixel, psnr, reconstucted_imgs], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], [cost, MSE_per_pixel, psnr, reconstucted_imgs], givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) train_model = theano.function( [index], [cost, MSE_per_pixel, psnr], updates=updates, givens={ y: train_set_y[index * batch_size:(index + 1) * batch_size], x: train_set_x[index * batch_size:(index + 1) * batch_size] }) decay_learning_rate_function = theano.function([], learning_rate, updates=[ (learning_rate, learning_rate * .995) ]) train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, output_len, decay_learning_rate_function, verbose=False) return validate_model, test_model
def emisEquation_HI_tt(self, xy_space, a, b, c): temp_range, den_range = xy_space return a + b * tt.log10(temp_range) + c * tt.log10( temp_range) * tt.log10(temp_range)
def log10(z): return T.log10(z)
def recur(self, w_j, y_j, sshot_tj, oh_jm1, oc_jm1, intent_t, degree_t, belief_t, actEmb_t): epsln = 10e-6 # current input in_j = T.nnet.sigmoid(self.Wemb[w_j]) # policy embedding if self.ply == 'attention': actEmb_tj = self.policy.encode(belief_t, degree_t, intent_t, oh_jm1, in_j.dimshuffle('x', 0)) else: # normal or latent policy actEmb_tj = actEmb_t # snapshot if self.use_snapshot: prdtshot_j = actEmb_tj[:, :self.dsp] / 2.0 + 0.5 snapcost_j = \ T.mean(sshot_tj * T.log10(prdtshot_j + epsln)) + \ T.mean((1.0 - sshot_tj) * T.log10(1.0 - prdtshot_j + epsln)) else: snapcost_j = T.sum(sshot_tj) # syntatic memory cell and gate # compute i, f, o, c together and slice it bundle_j = T.dot(in_j, self.oWgate).dimshuffle('x', 0) + \ T.dot(oh_jm1, self.oUgate) bundle_aj = T.dot(actEmb_tj, self.Wzh) # input gate ig = T.nnet.sigmoid(bundle_j[:, :self.doh] + bundle_aj[:, :self.doh] + self.b[:self.doh]) # use forget bias or not fg = T.nnet.sigmoid(bundle_j[:, self.doh:self.doh * 2] + bundle_aj[:, self.doh:self.doh * 2] + self.b[self.doh:self.doh * 2]) # output gate og = T.nnet.sigmoid(bundle_j[:, self.doh * 2:self.doh * 3] + bundle_aj[:, self.doh * 2:self.doh * 3] + self.b[self.doh * 2:self.doh * 3]) # proposed memory cell # reading gate, memory cell, hidden layer if self.struct == 'lstm_cond': # reading gate control signal rg = T.nnet.sigmoid(bundle_j[:, self.doh * 4:self.doh * 5] + bundle_aj[:, self.doh * 4:self.doh * 5] + self.b[self.doh * 3:]) cx_j = T.tanh(bundle_j[:, self.doh * 3:self.doh * 4]) oc_j = ig * cx_j + fg * oc_jm1 + \ rg * T.tanh(bundle_aj[:, self.doh * 3:self.doh * 4]) oh_j = og * T.tanh(oc_j) o_j = T.nnet.softmax(T.dot(oh_j, self.Who)) elif self.struct == 'lstm_mix': # two signals rg = T.nnet.sigmoid(bundle_j[:, self.doh * 4:self.doh * 5] + bundle_aj[:, self.doh * 4:self.doh * 5] + self.b[self.doh * 3:]) cx_j = T.tanh(bundle_j[:, self.doh * 3:self.doh * 4]) oc_j = ig * cx_j + fg * oc_jm1 oh_j = og * T.tanh(oc_j) + \ rg * T.tanh(bundle_aj[:, self.doh * 3:self.doh * 4]) o_j = T.nnet.softmax(T.dot(oh_j, self.Who)) elif self.struct == 'lstm_lm': # lm style cx_j = T.tanh(bundle_j[:, self.doh * 3:self.doh * 4] + bundle_aj[:, self.doh * 3:self.doh * 4]) oc_j = ig * cx_j + fg * oc_jm1 oh_j = og * T.tanh(oc_j) o_j = T.nnet.softmax(T.dot(oh_j, self.Who)) else: sys.exit('[ERROR]: Unseen decoder structure ' + self.struct) # compute output distribution and cross entropy error p_j = o_j[:, y_j] return oh_j, oc_j, p_j, snapcost_j
def dialog_recur(source_t, target_t, source_len_t, target_len_t, masked_source_t, masked_target_t, masked_source_len_t, masked_target_len_t, utt_group_t, snapshot_t, success_reward_t, sample_t, change_label_t, db_degree_t, inf_label_t, req_label_t, source_feat_t, target_feat_t, belief_tm1, masked_target_tm1, masked_target_len_tm1, target_feat_tm1, posterior_tm1): ############################################################## ##################### Intent encoder ######################### ############################################################## # Intent encoder if self.enc == 'lstm': masked_intent_t = bidirectional_encode(self.fEncoder, self.bEncoder, masked_source_t, masked_source_len_t) ############################################################## ########## Belief tracker, informable + requestable ########## ############################################################## # cost placeholder for accumulation print '\tloss function' loss_t = theano.shared(np.zeros((1), dtype=theano.config.floatX))[0] companion_loss_t = theano.shared( np.zeros((1), dtype=theano.config.floatX))[0] prior_loss_t = theano.shared( np.zeros((1), dtype=theano.config.floatX))[0] posterior_loss_t = theano.shared( np.zeros((1), dtype=theano.config.floatX))[0] base_loss_t = theano.shared( np.zeros((1), dtype=theano.config.floatX))[0] # other information to store dtmp = 1 #if self.vae_train=='sample' else self.dl reward_t = theano.shared( np.zeros((dtmp), dtype=theano.config.floatX)) baseline_t = theano.shared( np.zeros((1), dtype=theano.config.floatX))[0] posterior_t = theano.shared( np.zeros((self.dl), dtype=theano.config.floatX))[0] # Informable slot belief tracker # belief vector belief_t = [] if self.trk == 'rnn' and self.inf == True: for i in range(len(self.infotrackers)): # slice the current belief tracker output cur_belief_tm1 = belief_tm1[self.iseg[i]:self.iseg[i + 1]] if self.trkenc == 'cnn': # cnn, position features ssrcpos_js = source_feat_t[ 0, self.iseg[i]:self.iseg[i + 1], :] vsrcpos_js = source_feat_t[ 1, self.iseg[i]:self.iseg[i + 1], :] starpos_jm1s = target_feat_tm1[ 0, self.iseg[i]:self.iseg[i + 1], :] vtarpos_jm1s = target_feat_tm1[ 1, self.iseg[i]:self.iseg[i + 1], :] # tracking cur_belief_t = self.infotrackers[i].recur( cur_belief_tm1, masked_source_t, masked_target_tm1, masked_source_len_t, masked_target_len_tm1, ssrcpos_js, vsrcpos_js, starpos_jm1s, vtarpos_jm1s) # semi label cur_label_t = inf_label_t[self.iseg[i]:self.iseg[i + 1]] # include cost if training tracker if self.learn_mode == 'all' or self.learn_mode == 'trk': print '\t\tincluding informable tracker loss ...' loss_t += -T.sum( cur_label_t * T.log10(cur_belief_t + epsln)) # accumulate belief vector if self.bef == 'full': belief_t.append(cur_label_t) else: # summary belief tmp = [T.sum( cur_label_t[:-2],axis=0).dimshuffle('x'),\ cur_label_t[-2].dimshuffle('x')] tmp = tmp + [cur_label_t[-1].dimshuffle('x')] if\ self.bef=='summary' else tmp cur_sum_belief_t = T.concatenate(tmp, axis=0) belief_t.append(cur_sum_belief_t) inf_belief_t = inf_label_t # Requestable slot belief tracker if self.trk == 'rnn' and self.req == True: for i in range(len(self.rseg) - 1): # current feature index bn = self.iseg[-1] + 2 * i if self.trkenc == 'cnn': # cnn, position features ssrcpos_js = source_feat_t[0, bn, :] vsrcpos_js = source_feat_t[1, bn, :] starpos_jm1s = target_feat_tm1[0, bn, :] vtarpos_jm1s = target_feat_tm1[1, bn, :] # tracking cur_belief_t = self.reqtrackers[i].recur( masked_source_t, masked_target_tm1, masked_source_len_t, masked_target_len_tm1, ssrcpos_js, vsrcpos_js, starpos_jm1s, vtarpos_jm1s) # semi label cur_label_t = req_label_t[2 * i:2 * (i + 1)] # include cost if training tracker if self.learn_mode == 'all' or self.learn_mode == 'trk': print '\t\tincluding requestable tracker loss ...' loss_t += -T.sum( cur_label_t * T.log10(cur_belief_t + epsln)) # accumulate belief vector if self.bef == 'full': belief_t.append(cur_label_t) else: tmp = cur_label_t if self.bef == 'summary' else cur_label_t[: 1] belief_t.append(tmp) # offer-change tracker minus1 = -T.ones((1), dtype='int32') cur_belief_t = self.changeTracker.recur( masked_source_t, masked_target_tm1, masked_source_len_t, masked_target_len_tm1, minus1, minus1, minus1, minus1) # cost function if self.learn_mode == 'trk' or self.learn_mode == 'all': print '\t\tincluding OfferChange tracker loss ...' loss_t += -T.sum( change_label_t * T.log10(cur_belief_t + epsln)) # accumulate belief vector if self.bef == 'full': belief_t.append(change_label_t) else: tmp = change_label_t[:1] if self.bef=='simplified' \ else change_label_t belief_t.append(tmp) ############################################################## ######################## LSTM decoder ######################## ############################################################## bef_t = T.concatenate(belief_t, axis=0) # LSTM decoder if self.dec == 'lstm' and self.learn_mode != 'trk': prob_t, snapCost_t, prior_t, posterior_t, z_t, base_t, debugX = \ self.decoder.decode( masked_source_t, masked_source_len_t, masked_target_t, masked_target_len_t, masked_intent_t, belief_t, db_degree_t[-6:], utt_group_t, snapshot_t, sample_t) debug_t = prior_t # decoder loss if self.ply != 'latent': # deterministic policy print '\t\tincluding decoder loss ...' loss_t += -T.sum(T.log10(prob_t + epsln)) else: # variational policy # disconnet gradient flow P = G.disconnected_grad(prior_t) Q = G.disconnected_grad(posterior_t) Qtm1 = G.disconnected_grad(posterior_tm1) # prior network loss if self.learn_mode == 'rl': # rl fine-tuning print '\t\tincluding RL success reward for fine-tine policy ...' prior_loss_t = -success_reward_t * T.log10(prior_t + epsln)[z_t] else: # neural variational inference # encoder loss, minimising KL(Q|P) and self-supervised action print '\t\tinclding KL(Q|Pi) to train policy network Pi ...' prior_loss_t = -T.switch( T.lt(utt_group_t, self.dl - 1), T.log10(prior_t + epsln)[z_t], _alpha * T.sum(Q * (T.log10(prior_t + epsln) - T.log10(Q + epsln)))) # decoder loss for current sample/ground truth print '\t\tincluding decoder loss ...' loss_t = -T.sum(T.log10(prob_t + epsln)) # define reward function for Q print '\t\tincluding reinforce loss to train inference network Q ...' r_t = G.disconnected_grad( _avgLen * T.mean(T.log10(prob_t + epsln)) + # decoder loglikelihood -_lambda * T.sum(Q * (T.log10(Q + epsln) - T.log10(P + epsln))) + # KL(P|Q) -_lambda * T.sum(Qtm1 * (T.log10(Qtm1 + epsln) - T.log10(Q + epsln))) # KL(Qt|Qtm1) ) # actual reward after deducting baseline reward_t = G.disconnected_grad(r_t - base_t) baseline_t = base_t #debug_t = r_t-base_t # Q network loss: reinforce objective posterior_loss_t = -T.switch( T.lt(utt_group_t, self.dl - 1), T.log10(posterior_t + epsln)[z_t], # self-sup _alpha * reward_t * T.log10(posterior_t + epsln)[z_t] # reinforce ) # baseline loss print '\t\tincluding baseline loss ...' base_loss_t = T.switch(T.lt(utt_group_t, self.dl - 1), 0., (r_t - baseline_t)**2) # snapshot objective if self.use_snap: print '\t\tincluding decoder snapshot loss ...' companion_loss_t += -T.sum( snapCost_t[:masked_target_len_t - 1]) # dummy, TODO: change it if self.ply != 'latent': posterior_t = posterior_tm1 z_t = posterior_tm1 reward_t = posterior_tm1 prior_t = posterior_tm1 debug_t = posterior_tm1 # take the semi label for next input - like LM return inf_belief_t, masked_target_t, masked_target_len_t, \ target_feat_t, posterior_t, z_t,\ loss_t, companion_loss_t, prior_loss_t, posterior_loss_t, base_loss_t,\ reward_t, baseline_t, debug_t
def compute_source_mag_and_blend_fraction(data, Delta_F, F_base, u_0, model=None): """ Converts flux parameters :math:`(\Delta F, F_\mathrm{base})` to physically more relevant interesting quantities, the source star brightness in magnitudes and the blend ratio :math:`g=F_B/F_S`. Parameters ---------- data : :func:`~caustic.data.Data` Microlensing event data. Delta_F : theano.tensor Tensor of shape ``(n_bands)``. F_base : theano.tensor Tensor of shape ``(n_bands)``. u_0 : theano.tensor Lens--source separation at time :math:`t_0`. standardized : bool Wether or not the flux is standardized to unit std deviation and zero median. By default ``True``. model : pymc3.Model PyMC3 model object which was used to obtain posterior samples in the trace. Returns ------- tuple ``(m_source, g)``. """ model = pm.modelcontext(model) if model.is_standardized is True: # Revert F_base and Delta_F to non-standardized units data.units = "fluxes" fluxes_median = np.zeros(len(data.light_curves)) fluxes_std = np.zeros(len(data.light_curves)) for i, table in enumerate(data.light_curves): mask = table["mask"] fluxes_median[i] = np.median(table["flux"][mask]) fluxes_std[i] = np.std(table["flux"][mask]) # Flux parameters to standard flux units Delta_F_ = T.as_tensor_variable(fluxes_std) * Delta_F F_base_ = T.as_tensor_variable( fluxes_std) * F_base + T.as_tensor_variable(fluxes_median) else: Delta_F_ = Delta_F F_base_ = F_base # Calculate source flux and blend flux A_u0 = (u_0**2 + 2) / (T.abs_(u_0) * T.sqrt(u_0**2 + 4)) F_S = Delta_F_ / (A_u0 - 1) F_B = F_base_ - F_S g = F_B / F_S # Convert fluxes to magnitudes zero_point = 22.0 m_source = zero_point - 2.5 * T.log10(F_S) return m_source, g
def train_FSRCNN(train_set_x,train_set_y,valid_set_x,valid_set_y,test_set_x,test_set_y, n_train_batches, n_valid_batches, n_test_batches, n_epochs, batch_size,lr,upsampling_factor=4): #Assume x to be shape (batch_size,3,33,33) x = T.matrix('x') y = T.matrix('y') theano.config.optimizer = 'fast_compile' #print "theano optimizer: " + str(theano.config.optimizer) rng = np.random.RandomState(11111) index = T.lscalar() reshaped_input = x.reshape((batch_size,3,8,8)) reshaped_gt = y.reshape((batch_size,3,33,33)) learning_rate = theano.shared(np.cast[theano.config.floatX](lr)) #Upsampling layer now done in preprocessing to save compute #upsampled_input = T.nnet.abstract_conv.bilinear_upsampling(reshaped_input,upsampling_factor,batch_size=batch_size,num_input_channels=3) # r_fun = theano.function([index],upsampled_input.shape,givens = { # x: train_set_x[index * batch_size: (index + 1) * batch_size] # }) # theano.printing.debugprint(r_fun(0)) #Filter params f1 = 9 f2 = 5 f3 = 10 input_image_size = 8 output_len = input_image_size + f3 -1 #output_len = 16 #Conv for Patch extraction #print('batch size', batch_size) conv1 = Conv_Layer_ReLU(rng, reshaped_input, image_shape=(batch_size,3,input_image_size,input_image_size),filter_shape = (64,3,f1,f1)); conv1_len = input_image_size #Conv for Non linear mapping #print('conv1 done....') conv2 = Conv_Layer_ReLU(rng, conv1.output, image_shape=(batch_size,64,conv1_len,conv1_len),filter_shape = (32,64,f2,f2)) conv2_len = conv1_len #Conv for Reconstruction #conv2_output = conv2.output.repeat(2,2) #conv2_output = conv2_output.repeat(2,3) #conv3 = Conv_Layer_ReLU(rng, conv2.output, image_shape=(batch_size,32,conv2_len*2,conv2_len*2),filter_shape = (3,32,f3,f3)) #model_output = conv3.output conv3 = De_Conv_Layer_ReLU(rng, conv2.output, image_shape=(batch_size,32,conv2_len,conv2_len),filter_shape = (3,32,f3,f3)) model_output = conv3.output #print(model_output.shape) #grab center pixels #print('output len...', output_len) center_start = (33 - output_len) / 2 center_end = 33 - center_start sub_y = reshaped_gt[:,:,center_start:center_end,center_start:center_end] #sub_y = reshaped_gt #MSE between center pixels of prediction and ground truth cost = T.mean((sub_y-model_output) ** 2) #PSNR of a patch is based on color space MSE_per_pixel = cost/(output_len*output_len*3) psnr = 20 * T.log10(255) - 10 * T.log10(MSE_per_pixel) reconstucted_imgs = model_output #Perchannel cost iok # costs = [] # for d in sub_y.shape[0]: # channel_cost = cost = 1.0/batch_size * T.sum((sub_y[d,:,:]-model_output[d,:,:]) ** 2) # costs.append(channel_cost) params = conv3.params + conv2.params + conv1.params # #ADAM opt beta1 =theano.shared(np.cast[theano.config.floatX](0.9), name='beta1') beta2 =theano.shared(np.cast[theano.config.floatX](0.999), name='beta2') eps =theano.shared(np.cast[theano.config.floatX](1e-8), name='eps') updates = [] for param in params: m = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) v = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) new_m = beta1 * m + (np.cast[theano.config.floatX](1.) - beta1) * T.grad(cost, param) new_v = beta2 * v + (np.cast[theano.config.floatX](1.) - beta2) * T.sqr(T.grad(cost, param)) updates.append((m, new_m)) updates.append((v, new_v)) updates.append((param, param - learning_rate*new_m/(T.sqrt(new_v) + eps))) #RMSProp # updates = [] # for param in params: # cache = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) # rms_decay = np.cast[theano.config.floatX](0.999) # eps =theano.shared(np.cast[theano.config.floatX](1e-8)) # clip_grad = T.grad(cost,param) # # if T.ge(1.0,clip_grad): # # clip_grad = np.cast[theano.config.floatX](1.0) # # if T.le(-1,clip_grad): # # clip_grad = np.cast[theano.config.floatX](-1.0) # new_cache = rms_decay * cache + (np.cast[theano.config.floatX](1.0) - rms_decay) * clip_grad**2 # updates.append((cache, new_cache)) # updates.append((param,param - learning_rate * clip_grad/(T.sqrt(new_cache) + eps))) #nesterov momentum # updates = [] # mu = np.cast[theano.config.floatX](.9) # for param in params: # v_prev = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) # v = theano.shared(param.get_value()*np.cast[theano.config.floatX](0.)) # clip_grad = T.grad(cost,param) # if T.ge(np.cast[theano.config.floatX](1.0),clip_grad): # clip_grad = np.cast[theano.config.floatX](1.0) # if T.le(np.cast[theano.config.floatX](-1.0),clip_grad): # clip_grad = np.cast[theano.config.floatX](-1.0) # new_v_prev = v # new_v = mu * v - learning_rate * clip_grad # updates.append((v_prev, new_v_prev)) # updates.append((v, new_v)) # updates.append((param,param - mu * new_v_prev + (np.cast[theano.config.floatX](1.0) + mu) * new_v)) #SGD # clip_thresh = 1.0 # for param in params: # clip_grad = T.grad(cost,param) # if T.ge(clip_thresh,clip_grad): # clip_grad = np.cast[theano.config.floatX](clip_thresh) # if T.le(-clip_thresh,clip_grad): # clip_grad = np.cast[theano.config.floatX](-clip_thresh) # updates = [ # (param, param - learning_rate * clip_grad) # ] #Theano function complilation #if neccessary, could load here test_model = theano.function( [index], [cost,MSE_per_pixel,psnr,reconstucted_imgs], givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], [cost,MSE_per_pixel,psnr,reconstucted_imgs], givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) train_model = theano.function( [index], [cost,MSE_per_pixel,psnr], updates=updates, givens={ y: train_set_y[index * batch_size: (index + 1) * batch_size], x: train_set_x[index * batch_size: (index + 1) * batch_size] }) decay_learning_rate_function = theano.function([],learning_rate,updates = [(learning_rate,learning_rate * .995)]) train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs,output_len,decay_learning_rate_function, verbose = True) return validate_model,test_model
def log_operation(x): return 10 * T.log10(x)
y = T.scalar('y') z = T.log10(x+y) f = theano.function([x,y],z) return f(var1, var2) ''' def logMath(var1, var2): return math.log10(var1+var2) def logTheano(var1, var2): return f(var1,var2) A = [[10,10],[100,100]] x = T.scalar('x') y = T.scalar('y') z = T.log10(x+y) f = theano.function([x,y],z) print('Theano') start = time() #print start for I in range(1000): ans1 = logTheano(I,1) #print ans elapsed1 = time() - start print elapsed1 print('Math Module') start = time() for I in range(1000): ans2 = logMath(I,1) #print ans elapsed2 = time() - start
# print(f'Measuring {N} calls of jacobi') # integrators = [IntegrateVectorized(f, [df_dphi, df_dalpha, df_dbeta], energy, np.array(b), amplitude_, alpha_, beta_) for b in zip(bins[: -1], bins[1:])] # T.jacobian(integrator(amplitude, alpha, beta), amplitude).eval({amplitude: 4.0, alpha: 2.0, beta: 0.5}) # t0 = time.time() # for i in range(N): # for integrator in integrators: # T.jacobian(integrator(amplitude, alpha, beta), amplitude).eval({amplitude: 4.0, alpha: 2.0, beta: 0.5}) # t1 = time.time() # print(f'Takes approximately {(t1-t0) / N} seconds per iteration, {(t1-t0)} seconds in total (for {len(bins)} bins)') print('--' * 30) print('Integrating Vectorized Generalized') print(f'Measuring {N} calls of eval') energy = T.dvector('energy') func = amplitude_ * energy**(-alpha_ - beta_ * T.log10(energy)) integrator = IntegrateVectorizedGeneralized(func, energy, bins, amplitude_, alpha_, beta_) integration_result_generalized = integrator(amplitude, alpha, beta).eval({ amplitude: 4.0, alpha: 2.0, beta: 0.5 }) t0 = time.time() for i in range(N): integrator(amplitude, alpha, beta).eval({ amplitude: 4.0,