def __init__(self, dnodex,inputdim,dim): X=T.ivector() Y=T.ivector() Z=T.lscalar() eta = T.scalar() temperature=T.scalar() self.dnodex=dnodex num_input = inputdim dnodex.umatrix=theano.shared(floatX(np.random.randn(*(self.dnodex.nuser,inputdim, inputdim)))) dnodex.pmatrix=theano.shared(floatX(np.random.randn(*(self.dnodex.npoi,inputdim)))) dnodex.p_l2_norm=(dnodex.pmatrix**2).sum() dnodex.u_l2_norm=(dnodex.umatrix**2).sum() num_hidden = dim num_output = inputdim inputs = InputPLayer(dnodex.pmatrix[X,:], dnodex.umatrix[Z,:,:], name="inputs") lstm1 = LSTMLayer(num_input, num_hidden, input_layer=inputs, name="lstm1") lstm2 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm1, name="lstm2") lstm3 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm2, name="lstm3") softmax = SoftmaxPLayer(num_hidden, num_output, dnodex.umatrix[Z,:,:], input_layer=lstm3, name="yhat", temperature=temperature) Y_hat = softmax.output() self.layers = inputs, lstm1,lstm2,lstm3,softmax params = get_params(self.layers) #caches = make_caches(params) cost = T.mean(T.nnet.categorical_crossentropy(Y_hat, T.dot(dnodex.pmatrix[Y,:],dnodex.umatrix[Z,:,:])))+eta*dnodex.p_l2_norm+eta*dnodex.u_l2_norm updates = PerSGD(cost,params,eta,X,Z,dnodex)#momentum(cost, params, caches, eta) self.train = theano.function([X,Y,Z, eta, temperature], cost, updates=updates, allow_input_downcast=True) predict_updates = one_step_updates(self.layers) self.predict_char = theano.function([X, Z, temperature], Y_hat, updates=predict_updates, allow_input_downcast=True)
def __init__(self, dnodex,dim): X = T.matrix() Y = T.matrix() eta = T.scalar() temperature=T.scalar() num_input = len(format(dnodex.npoi,'b')) num_hidden = dim num_output = len(format(dnodex.npoi,'b')) inputs = InputLayer(X, name="inputs") lstm1 = LSTMLayer(num_input, num_hidden, input_layer=inputs, name="lstm1") lstm2 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm1, name="lstm2") #lstm3 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm2, name="lstm3") softmax = SoftmaxLayer(num_hidden, num_output, input_layer=lstm2, name="yhat", temperature=temperature) Y_hat = softmax.output() self.layers = inputs, lstm1, lstm2, softmax params = get_params(self.layers) caches = make_caches(params) cost = T.mean(T.nnet.categorical_crossentropy(Y_hat, Y)) updates = momentum(cost, params, caches, eta) self.train = theano.function([X, Y, eta, temperature], cost, updates=updates, allow_input_downcast=True) predict_updates = one_step_updates(self.layers) self.predict_char = theano.function([X, temperature], Y_hat, updates=predict_updates, allow_input_downcast=True)
def __init__(self, num_input, num_cells=50, num_output=1, lr=0.01, rho=0.95): X = T.matrix('x') Y = T.matrix('y') eta = T.scalar('eta') alpha = T.scalar('alpha') self.num_input = num_input self.num_output = num_output self.num_cells = num_cells self.eta = eta inputs = InputLayer(X, name="inputs") lstm = LSTMLayer(num_input, num_cells, input_layer=inputs, name="lstm") fc = FullyConnectedLayer(num_cells, num_output, input_layer=lstm) Y_hat = T.mean(fc.output(), axis=2) layer = inputs, lstm, fc self.params = get_params(layer) self.caches = make_caches(self.params) self.layers = layer mean_cost = T.mean((Y - Y_hat)**2) last_cost = T.mean((Y[-1] - Y_hat[-1])**2) self.cost = alpha*mean_cost + (1-alpha)*last_cost """" self.updates = momentum(self.cost, self.params, self.caches, self.eta, clip_at=3.0) """ self.updates,_,_,_,_ = create_optimization_updates(self.cost, self.params, method="adadelta", lr= lr, rho=rho) self.train = theano.function([X, Y, alpha], [self.cost, last_cost] ,\ updates=self.updates, allow_input_downcast=True) self.costfn = theano.function([X, Y, alpha], [self.cost, last_cost],\ allow_input_downcast=True) self.predict = theano.function([X], [Y_hat], allow_input_downcast=True)
def __init__(self, num_input=256, num_hidden=512, num_output=256): X = T.matrix() Y = T.matrix() eta = T.scalar() alpha = T.scalar() self.num_input = num_input self.num_hidden = num_hidden self.num_output = num_output inputs = InputLayer(X, name="inputs") lstm1f = LSTMLayer(num_input, num_hidden, input_layers=[inputs], name="lstm1f") lstm1b = LSTMLayer(num_input, num_hidden, input_layers=[inputs], name="lstm1b", go_backwards=True) fc = FullyConnectedLayer(2*num_hidden, num_output, input_layers=[lstm1f, lstm1b], name="yhat") Y_hat = sigmoid(T.mean(fc.output(), axis=0)) self.layers = inputs, lstm1f, lstm1b, fc params = get_params(self.layers) caches = make_caches(params) mean_cost = - T.mean( Y * T.log(Y_hat) + (1-Y) * T.log(1-Y_hat) ) last_step_cost = - T.mean( Y[-1] * T.log(Y_hat[-1]) + (1-Y[-1]) * T.log(1-Y_hat[-1]) ) cost = alpha * mean_cost + (1-alpha) * last_step_cost updates = momentum(cost, params, caches, eta, clip_at=3.0) self.train = theano.function([X, Y, eta, alpha], [cost, last_step_cost], updates=updates, allow_input_downcast=True) self.predict=theano.function([X], [Y_hat[-1]], allow_input_downcast=True)
def __init__(self, dnodex,inputdim,dim): X=T.ivector() Y=T.ivector() Z=T.lscalar() NP=T.ivector() lambd = T.scalar() eta = T.scalar() temperature=T.scalar() num_input = inputdim self.umatrix=theano.shared(floatX(np.random.rand(dnodex.nuser,inputdim, inputdim))) self.pmatrix=theano.shared(floatX(np.random.rand(dnodex.npoi,inputdim))) self.p_l2_norm=(self.pmatrix**2).sum() self.u_l2_norm=(self.umatrix**2).sum() num_hidden = dim num_output = inputdim inputs = InputPLayer(self.pmatrix[X,:], self.umatrix[Z,:,:], name="inputs") lstm1 = LSTMLayer(num_input, num_hidden, input_layer=inputs, name="lstm1") #lstm2 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm1, name="lstm2") #lstm3 = LSTMLayer(num_hidden, num_hidden, input_layer=lstm2, name="lstm3") softmax = SoftmaxPLayer(num_hidden, num_output, self.umatrix[Z,:,:], input_layer=lstm1, name="yhat", temperature=temperature) Y_hat = softmax.output() self.layers = inputs, lstm1,softmax params = get_params(self.layers) #caches = make_caches(params) tmp_u=T.mean(T.dot(self.pmatrix[X,:],self.umatrix[Z,:,:]),axis=0) tr=T.dot(tmp_u,(self.pmatrix[X,:]-self.pmatrix[NP,:]).transpose()) pfp_loss1=sigmoid(tr) pfp_loss=pfp_loss1*(T.ones_like(pfp_loss1)-pfp_loss1) tmp_u1=T.reshape(T.repeat(tmp_u,X.shape[0]),(inputdim,X.shape[0])).T pfp_lossv=T.reshape(T.repeat(pfp_loss,inputdim),(inputdim,X.shape[0])).T cost = lambd*10*T.mean(T.nnet.categorical_crossentropy(Y_hat, T.dot(self.pmatrix[Y,:],self.umatrix[Z,:,:])))+lambd*self.p_l2_norm+lambd*self.u_l2_norm # updates = PerSGD(cost,params,eta,X,Z,dnodex)#momentum(cost, params, caches, eta) updates = [] grads = T.grad(cost=cost, wrt=params) updates.append([self.pmatrix,T.set_subtensor(self.pmatrix[X,:],self.pmatrix[X,:]-eta*grads[0])]) updates.append([self.umatrix,T.set_subtensor(self.umatrix[Z,:,:],self.umatrix[Z,:,:]-eta*grads[1])]) for p,g in zip(params[2:], grads[2:]): updates.append([p, p - eta * g]) rlist=T.argsort(T.dot(tmp_u,self.pmatrix.T))[::-1] n_updates=[(self.pmatrix, T.set_subtensor(self.pmatrix[NP,:],self.pmatrix[NP,:]-eta*pfp_lossv*tmp_u1-eta*lambd*self.pmatrix[NP,:]))] p_updates=[(self.pmatrix, T.set_subtensor(self.pmatrix[X,:],self.pmatrix[X,:]+eta*pfp_lossv*tmp_u1-eta*lambd*self.pmatrix[X,:])),(self.umatrix, T.set_subtensor(self.umatrix[Z,:,:],self.umatrix[Z,:,:]+eta*T.mean(pfp_loss)*(T.reshape(tmp_u,(tmp_u.shape[0],1))*T.mean(self.pmatrix[X,:]-self.pmatrix[NP,:],axis=0)))-eta*lambd*self.umatrix[Z,:,:])] self.train = theano.function([X,Y,Z, eta, lambd, temperature], cost, updates=updates, allow_input_downcast=True) self.trainpos=theano.function([X,NP,Z,eta, lambd],tmp_u, updates=p_updates,allow_input_downcast=True) self.trainneg=theano.function([X,NP,Z,eta, lambd],T.mean(pfp_loss), updates=n_updates,allow_input_downcast=True) self.predict_pfp = theano.function([X,Z], rlist, allow_input_downcast=True)
def __init__(self, num_input=256, num_hidden=[512,512], num_output=256, clip_at=0.0, scale_norm=0.0): X = T.matrix() Y = T.matrix() eta = T.scalar() alpha = T.scalar() lambda2 = T.scalar() drop_prob = T.scalar() self.num_input = num_input self.num_hidden = num_hidden self.num_output = num_output self.clip_at = clip_at self.scale_norm = scale_norm inputs = InputLayer(X, name="inputs") num_prev = num_input prev_layer = inputs layers = [ inputs ] for i,num_curr in enumerate(num_hidden): lstm = LSTMLayer(num_prev, num_curr, input_layers=[prev_layer], name="lstm{0}".format(i+1), drop_prob=drop_prob) num_prev = num_curr prev_layer = lstm layers.append(lstm) sigmoid = SigmoidLayer(num_prev, num_output, input_layers=[prev_layer], name="yhat") layers.append(sigmoid) # lstm1 = LSTMLayer(num_input, num_hidden, input_layers=[inputs], name="lstm1") # lstm2 = LSTMLayer(num_hidden, num_hidden, input_layers=[lstm1], name="lstm2") # lstm3 = LSTMLayer(num_hidden, num_hidden, input_layers=[lstm2], name="lstm2") # sigmoid = SigmoidLayer(num_hidden, num_output, input_layers=[lstm2], name="yhat") Y_hat = sigmoid.output() #self.layers = inputs, lstm1, lstm2, sigmoid #, lstm3 self.layers = layers params = get_params(self.layers) caches = make_caches(params) mean_cost = - T.mean( Y * T.log(Y_hat) + (1-Y) * T.log(1-Y_hat) ) last_step_cost = - T.mean( Y[-1] * T.log(Y_hat[-1]) + (1-Y[-1]) * T.log(1-Y_hat[-1]) ) cost = alpha * mean_cost + (1-alpha) * last_step_cost updates = momentum(cost, params, caches, eta, clip_at=self.clip_at, scale_norm=self.scale_norm, lambda2=lambda2) self.train_func = theano.function([X, Y, eta, alpha, lambda2, drop_prob], [cost, last_step_cost], updates=updates, allow_input_downcast=True) self.predict_func=theano.function([X, drop_prob], [Y_hat[-1]], allow_input_downcast=True)
# Specially at the end of audio files where mask is # all zero for some of the shorter files in mini-batch. #cost = cost.sum(axis=1) / target_mask.sum(axis=1) #cost = cost.mean(axis=0) # Use this one instead. cost = cost.sum() cost = cost / target_mask.sum() # By default we report cross-entropy cost in bits. # Switch to nats by commenting out this line: # log_2(e) = 1.44269504089 cost = cost * lib.floatX(numpy.log2(numpy.e)) ### Getting the params, grads, updates, and Theano functions ### params = lib.get_params(cost, lambda x: hasattr(x, 'param') and x.param==True) lib.print_params_info(params, path=FOLDER_PREFIX) grads = T.grad(cost, wrt=params, disconnected_inputs='warn') grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in grads] updates = lasagne.updates.adam(grads, params, learning_rate=LEARNING_RATE) # Training function train_fn = theano.function( [sequences, mask], cost, updates=updates, on_unused_input='warn' )
# Specially at the end of audio files where mask is # all zero for some of the shorter files in mini-batch. #cost = cost.sum(axis=1) / target_mask.sum(axis=1) #cost = cost.mean(axis=0) # Use this one instead. cost = cost.sum() cost = cost / target_mask.sum() # By default we report cross-entropy cost in bits. # Switch to nats by commenting out this line: # log_2(e) = 1.44269504089 cost = cost * lib.floatX(numpy.log2(numpy.e)) ### Getting the params, grads, updates, and Theano functions ### params = lib.get_params(cost, lambda x: hasattr(x, 'param') and x.param == True) lib.print_params_info(params, path=FOLDER_PREFIX) grads = T.grad(cost, wrt=params, disconnected_inputs='warn') grads = [ T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in grads ] updates = lasagne.updates.adam(grads, params, learning_rate=LEARNING_RATE) # Training function train_fn = theano.function([sequences, h0, reset, mask], [cost, new_h0], updates=updates, on_unused_input='warn') # Validation and Test function, hence no updates
samples = sample_gmm(mu, sigma, weights, theano_rng) cost_raw = cost_gmm(vocoder_audio, mu, sigma, weights) elif LOSS == "MSE": samples = output cost_raw = T.sum((samples - vocoder_audio) ** 2, axis=-1) else: raise NotImplementedError("{} is not implemented.".format(LOSS)) cost = T.sum(cost_raw * mask + kl_cost)/(mask.sum() + lib.floatX(EPS)) discriminator_cost = disc_positive_cost generator_cost = cost - disc_positive_cost discriminator_params = lib.get_params(discriminator_cost, lambda x: (hasattr(x, 'param') and x.param==True) and ("Discriminator" in x.name)) generator_params = lib.get_params(generator_cost, lambda x: (hasattr(x, 'param') and x.param==True) and ("Discriminator" not in x.name)) params = discriminator_params + generator_params lib.print_params_info(params, path=FOLDER_PREFIX) discriminator_grads = T.grad(discriminator_cost, wrt=discriminator_params, disconnected_inputs='warn') generator_grads = T.grad(generator_cost, wrt=generator_params, disconnected_inputs='warn') discriminator_grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in discriminator_grads] discriminator_grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in generator_grads] discriminator_updates = lasagne.updates.sgd(discriminator_grads, discriminator_params, learning_rate=lr) generator_updates = lasagne.updates.adam(generator_grads, generator_params, learning_rate=lr)
### Getting the params, grads, updates, and Theano functions ### #params = lib.get_params(cost, lambda x: hasattr(x, 'param') and x.param==True) #ip_params = lib.get_params(ip_cost, lambda x: hasattr(x, 'param') and x.param==True\ # and 'BigFrameLevel' in x.name) #other_params = [p for p in params if p not in ip_params] #params = ip_params + other_params #lib.print_params_info(params, path=FOLDER_PREFIX) # #grads = T.grad(cost, wrt=params, disconnected_inputs='warn') #grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in grads] # #updates = lasagne.updates.adam(grads, params, learning_rate=LEARNING_RATE) ########### all_params = lib.get_params(cost, lambda x: hasattr(x, 'param') and x.param == True) ip_params = lib.get_params(ip_cost, lambda x: hasattr(x, 'param') and x.param==True\ and 'BigFrameLevel' in x.name) other_params = [p for p in all_params if p not in ip_params] all_params = ip_params + other_params # lib.print_params_info(ip_params, path=FOLDER_PREFIX) # lib.print_params_info(other_params, path=FOLDER_PREFIX) lib.print_params_info(all_params, path=FOLDER_PREFIX) # ip_grads = T.grad(ip_cost, wrt=ip_params, disconnected_inputs='warn') # ip_grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in ip_grads] # other_grads = T.grad(cost, wrt=other_params, disconnected_inputs='warn') # other_grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in other_grads] grads = T.grad(cost, wrt=all_params, disconnected_inputs='warn')
def main(params): ''' params = (cathode_potential,data_set,grid_ratio:None,chamber_pressure:None) ''' cathode_potential = params[0] data_set = params[1] if params[2] is None: grid_ratio = cathode_radius/anode_radius #Anode radius stays fixed else: grid_ratio = params[2] if params[3] is None: chamber_pressure = get_params('Chamber','Pressure') else: chamber_pressure = params[3] logger.info("INIT") Te = np.abs(cathode_potential) #electron temperature in eV fusor = Domain() nx,ny = nodes = fusor.get_nodes() cathode = fusor.build_grid(anode_radius*grid_ratio) anode = fusor.build_grid(anode_radius) dx = fusor.dx dy = fusor.dy loop = ESPIC(cathode,anode,nodes,600,cathode_potential) particles = Particles(nodes) fusion = Fusion() PHI_G = np.zeros(nodes) loop.build_potential_matrix(PHI_G) timeStep = loop.time_steps spec_wt = particles.get_specificWeight(cathode_potential) for step in range(0,timeStep): logger.info(f""" Begining Time Step # {step} Particle Count: {particles.count} """) loop.DEN = np.zeros(nodes) # Number Density Matrix loop.EFX = np.zeros(nodes) # Electric Field Matrix, x-component loop.EFY = np.zeros(nodes) # Electric Field Matrix, y-component CHG = np.zeros(nodes) # Charge Density Matrix loop.counter = 0 particles.lost = 0 fusion.counter = 0 ''' 1. Compute Charge Density ''' for p in range(particles.count): fi = (particles.pos[p, 0] + fusor.chamber_radius-dx)/dx #real i index of particle's cell i = np.floor(fi).astype(int) #integral part hx = fi - i #the remainder fj = (particles.pos[p,1] + (fusor.chamber_height/2)-dx)/dx #real i index of particle's cell j = np.floor(fj).astype(int) #integral part hy = fj - j #the remainder #interpolate charge to nodes CHG[i, j] = CHG[i, j] + (1-hx)*(1-hy) CHG[i+1, j] = CHG[i+1, j] + hx*(1-hy) CHG[i, j+1] = CHG[i, j+1] + (1-hx)*hy CHG[i+1, j+1] = CHG[i+1, j+1] + hx*hy # Calculate Densisty loop.get_density(CHG,spec_wt,dx) ''' 2. Compute Electric Potential ''' PHI_G = loop.get_potential(PHI_G,Te,dx) logger.info("Computed Electric Potential") ''' 3. Compute Electric Field ''' logger.info('Computing Electric Field') loop.get_electricField(PHI_G,dx,dy) ''' 4. Generate Particles ''' logger.info('Injecting Particles') particles.generate(radius) ''' 5. Move Particles ''' logger.info("Moving Particles") m = 0 # Particle index while m < particles.count: i,j,hx,hy = particles.get_index(m,dx) i = int(i) j = int(j) E_field = loop.gather_electricField(i,j,hx,hy) F = fusor.QE*E_field a = F/fusor.massIon particles.vel[m,:] = particles.vel[m,:] + a*loop.dt particles.pos[m,:] = particles.pos[m,:] + particles.vel[m,:]*loop.dt rho = loop.gather_density(i,j,hx,hy) fusion_prob = particles.get_fusion_prob(m,rho,spec_wt) fusion_chance = np.random.rand(1) logging.debug(f"Fusion Probability: {fusion_prob}") logging.debug(f"Fusion Chance: {fusion_chance}") radial_distance = np.sqrt(particles.pos[m,0]**2 + particles.pos[m,1]**2) # Top Wall if particles.pos[m,1] > fusor.chamber_height/2: particles.kill(m) m += -1 fusor.top_counter += 1 #Bottom Wall elif particles.pos[m,1] < -fusor.chamber_height/2: particles.kill(m) m += -1 fusor.bottom_counter += 1 #Left Wall elif particles.pos[m,0] < -fusor.chamber_radius: particles.kill(m) m += -1 fusor.left_counter += 1 #Right Wall elif particles.pos[m,0] > fusor.chamber_radius: particles.kill(m) m += -1 fusor.right_counter += 1 #Anode elif (radial_distance < anode_radius + fusor.wire_radius) and (radial_distance > anode_radius - fusor.wire_radius): prob = np.random.rand(1) if prob > fusor.anode_gt: particles.kill(m) m += -1 fusor.anode_counter += 1 #Cathode elif (radial_distance < cathode_radius + fusor.wire_radius) and (radial_distance > cathode_radius - fusor.wire_radius): prob = np.random.rand(1) if prob > fusor.cathode_gt: particles.kill(m) m += -1 fusor.cathode_counter += 1 #Check Fusion elif fusion_chance[0] <= fusion_prob: fuse_position = particles.pos[[m]] fusion.occured(fuse_position[0]) particles.kill(m) m += -1 m += 1 # move onto next particle #Get Fusion data fusion.rate_data.append([fusion.counter,step*fusor.dt]) logger.info('Finshed moving Particles') logger.info(f""" Net Charge: {particles.insert - particles.lost} Particles lost: {particles.lost} Fusion Events / dt: {fusion.counter} Total Fusion Events: {fusion.events} """) with h5py.File(f'data\\potential{data_set}.h5','w') as hdf: G2 = hdf.create_group("DataSets/potential/") dataset1 = G2.create_dataset('ParticlePosition',data=particles.pos) dataset2 = G2.create_dataset('ParticleVelocity', data=particles.vel) dataset6 = G2.create_dataset('PHI',data=PHI_G) G3 = hdf.create_group("DataSets/electricfield/") dataset4 = G3.create_dataset('electricFieldx',data=loop.EFX) dataset5 = G3.create_dataset('electricFieldy',data=loop.EFY) G4 = hdf.create_group("DataSets/density/") dataset3 = G4.create_dataset('Density',data=loop.DEN) G1 = hdf.create_group(f"DataSets/fusion") dataset7 = G1.create_dataset('Position',data=fusion.position) dataset9 = G1.create_dataset('RateData',data = fusion.rate_data) dataset10 = G1.create_dataset('FusionCount',data = fusion.events) dataset11 = G1.create_dataset('ChamberPressure', data=chamber_pressure) dataset11.attrs['units'] = 'torr' groups = [G1, G2, G3, G4] for group in groups: group.attrs['gridPotential'] = f'{cathode_potential/1000} kV' group.attrs['gridRatio'] = f'{grid_ratio}' print(f"Completed Simulation :: {data_set}")
import logging import numpy as np import h5py import concurrent.futures import matplotlib.pyplot as plt from time import perf_counter ''' Setup Logger ''' logger = logging.getLogger(__name__) # logger.setLevel(logging.DEBUG) # formatter = logging.Formatter('%(name)s :: %(process)d :: %(message)s') logging.basicConfig(level=logging.DEBUG, format='%(name)s :: %(message)s') #Variable Inputs (for later) cathode_radius = get_params('Grid','Cathode_Radius') # Cathode [m] anode_radius = get_params('Grid','Anode_Radius') # Anode [m] radius = get_params('Source','sourceRadius') def main(params): ''' params = (cathode_potential,data_set,grid_ratio:None,chamber_pressure:None) ''' cathode_potential = params[0] data_set = params[1] if params[2] is None: grid_ratio = cathode_radius/anode_radius #Anode radius stays fixed
def __init__(self, num_input=256, num_hidden=[512, 512], num_output=256, clip_at=0.0, scale_norm=0.0): X = T.matrix() Y = T.matrix() eta = T.scalar() alpha = T.scalar() lambda2 = T.scalar() drop_prob = T.scalar() self.num_input = num_input self.num_hidden = num_hidden self.num_output = num_output self.clip_at = clip_at self.scale_norm = scale_norm inputs = InputLayer(X, name="inputs") num_prev = num_input prev_layer = inputs layers = [inputs] for i, num_curr in enumerate(num_hidden): lstm = LSTMLayer(num_prev, num_curr, input_layers=[prev_layer], name="lstm{0}".format(i + 1), drop_prob=drop_prob) num_prev = num_curr prev_layer = lstm layers.append(lstm) sigmoid = SigmoidLayer(num_prev, num_output, input_layers=[prev_layer], name="yhat") layers.append(sigmoid) # lstm1 = LSTMLayer(num_input, num_hidden, input_layers=[inputs], name="lstm1") # lstm2 = LSTMLayer(num_hidden, num_hidden, input_layers=[lstm1], name="lstm2") # lstm3 = LSTMLayer(num_hidden, num_hidden, input_layers=[lstm2], name="lstm2") # sigmoid = SigmoidLayer(num_hidden, num_output, input_layers=[lstm2], name="yhat") Y_hat = sigmoid.output() #self.layers = inputs, lstm1, lstm2, sigmoid #, lstm3 self.layers = layers params = get_params(self.layers) caches = make_caches(params) mean_cost = -T.mean(Y * T.log(Y_hat) + (1 - Y) * T.log(1 - Y_hat)) last_step_cost = -T.mean(Y[-1] * T.log(Y_hat[-1]) + (1 - Y[-1]) * T.log(1 - Y_hat[-1])) cost = alpha * mean_cost + (1 - alpha) * last_step_cost updates = momentum(cost, params, caches, eta, clip_at=self.clip_at, scale_norm=self.scale_norm, lambda2=lambda2) self.train_func = theano.function( [X, Y, eta, alpha, lambda2, drop_prob], [cost, last_step_cost], updates=updates, allow_input_downcast=True) self.predict_func = theano.function([X, drop_prob], [Y_hat[-1]], allow_input_downcast=True)
### Getting the params, grads, updates, and Theano functions ### #params = lib.get_params(cost, lambda x: hasattr(x, 'param') and x.param==True) #ip_params = lib.get_params(ip_cost, lambda x: hasattr(x, 'param') and x.param==True\ # and 'BigFrameLevel' in x.name) #other_params = [p for p in params if p not in ip_params] #params = ip_params + other_params #lib.print_params_info(params, path=FOLDER_PREFIX) # #grads = T.grad(cost, wrt=params, disconnected_inputs='warn') #grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in grads] # #updates = lasagne.updates.adam(grads, params, learning_rate=LEARNING_RATE) ########### all_params = lib.get_params(cost, lambda x: hasattr(x, 'param') and x.param==True) ip_params = lib.get_params(ip_cost, lambda x: hasattr(x, 'param') and x.param==True\ and 'BigFrameLevel' in x.name) other_params = [p for p in all_params if p not in ip_params] all_params = ip_params + other_params lib.print_params_info(ip_params, path=FOLDER_PREFIX) lib.print_params_info(other_params, path=FOLDER_PREFIX) lib.print_params_info(all_params, path=FOLDER_PREFIX) ip_grads = T.grad(ip_cost, wrt=ip_params, disconnected_inputs='warn') ip_grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in ip_grads] other_grads = T.grad(cost, wrt=other_params, disconnected_inputs='warn') other_grads = [T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in other_grads] grads = T.grad(cost, wrt=all_params, disconnected_inputs='warn')
# all zero for some of the shorter files in mini-batch. #cost = cost.sum(axis=1) / target_mask.sum(axis=1) #cost = cost.mean(axis=0) cost_sum = T.sum(cost, axis=1) # Use this one instead. cost = cost.sum() cost = cost / target_mask.sum() #cost average by samples # By default we report cross-entropy cost in bits. # Switch to nats by commenting out this line: # log_2(e) = 1.44269504089 #cost = cost * lib.floatX(numpy.log2(numpy.e)) ########### all_params = lib.get_params( cost, lambda x: hasattr(x, 'param') and x.param == True ) #if LEARN_H0=True,then learn_h0 is included in parmeters to train lib.print_params_info(all_params, path=FOLDER_PREFIX) grads = T.grad(cost, wrt=all_params, disconnected_inputs='warn') grads = [ T.clip(g, lib.floatX(-GRAD_CLIP), lib.floatX(GRAD_CLIP)) for g in grads ] updates = lasagne.updates.adam(grads, all_params, learning_rate=lr) # Training function(s) train_fn = theano.function([ sequences_8k, sequences_up, condition, con_h0, big_h0, h0, reset, mask, batch_size, lr