Python adadelta Examples

Programming Language: Python

Namespace/Package Name: tools

Method/Function: adadelta

Examples at hotexamples.com: 2

Python adadelta - 2 examples found. These are the top rated real world Python examples of tools.adadelta extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: ntm_translate.py Project: dandxy89/NTMtranslation

	def __init__(self, vector_size, voc_size, head_type, head_num, controller_type, controller_sizes, memory_size,shift_width = 3, activation = T.tanh):
		self.controller = controller_type(controller_sizes)
		embedding = tools.initial_weights(voc_size[0]+1, vector_size)
		self.embedding = theano.shared(value = embedding, name = 'embedding', borrow=True)
		input_w = tools.initial_weights(vector_size, controller_sizes[0])
		self.input_w = theano.shared(value = input_w, name = 'input_w', borrow=True)
		input_b = 0.*tools.initial_weights(controller_sizes[0])
		self.input_b = theano.shared(value = input_b, name = 'input_b', borrow=True)
		read_w = tools.initial_weights(memory_size[1], controller_sizes[0])
		self.read_w = theano.shared(value = read_w, name = 'read_w', borrow=True)
		output_w = tools.initial_weights(controller_sizes[-1], voc_size[1])
		self.output_w = theano.shared(value=output_w,name='Controller_outw', borrow=True)
		output_b = 0.*tools.initial_weights(voc_size[1])
		self.output_b = theano.shared(value=output_b,name='Controller_outb', borrow=True)
		memory_init_p = 2*(numpy.random.rand(memory_size[0],memory_size[1])-0.5)
		weight_init_p = numpy.random.randn((memory_size[0]))
		self.memory_init = theano.shared(value = memory_init_p, name = 'memory_init', borrow=True)
		self.weight_init = theano.shared(value = weight_init_p, name = 'weight_init', borrow=True)
		self.params = self.controller.params+[self.embedding, self.input_w, self.read_w, self.input_b, self.weight_init,self.memory_init,self.output_w, self.output_b]

		memory_init = self.memory_init
		weight_init = tools.vector_softmax(self.weight_init)

		self.heads = []
		for i in xrange(head_num):
			if head_type == Head_neural:
				self.heads.append(head_type(controller_sizes[-1], memory_size,i))
			else:
				self.heads.append(head_type(controller_sizes[-1], memory_size, shift_width,i))
			self.params += self.heads[i].params
		print self.params

		def pred_t(input_voc_t, weight_tm1, memory_tm1):
			rawinput_t = self.embedding[input_voc_t]
			input_t = T.dot(rawinput_t,self.input_w)
			read_m = T.dot(weight_tm1, memory_tm1)
			read_t = T.dot(read_m,self.read_w)
			controller_input = activation(input_t+read_t+self.input_b)
			hid = self.controller.getY(controller_input)
			output = T.nnet.softmax(T.dot(hid, self.output_w)+self.output_b)
			result = T.switch(T.eq(input_voc_t, 0),T.argmax(output,axis=1), theano.shared(0))
			#test = controller_input
			
			memory_inter = memory_tm1
			weight_inter = weight_tm1
			for head in self.heads:
				weight_inter, erase, add= head.emit_new_weight(hid, weight_inter, memory_inter)
				#write to memory
				weight_tdim = weight_inter.dimshuffle((0, 'x'))
				erase_dim = erase.dimshuffle(('x', 0))
				add_dim = add.dimshuffle(('x', 0))
				M_erased = memory_inter*(1-(weight_tdim*erase_dim))
				memory_inter = M_erased+(weight_tdim*add_dim)

			#testing = weight_tm1
			#testing2 = rawinput_t
			memory_t = memory_inter
			weight_t = weight_inter
			

			return weight_t, memory_t, output,result


		input = T.lvector()
		output = T.lvector()

		pred, _ = theano.scan(fn = pred_t,
							sequences = [input],
							outputs_info = [weight_init, memory_init, None,None])

		p_output = -T.log(pred[-2])[output.shape[0]-1:]
		#output = output.reshape(output.shape[0],1)
		def cost_step(po, o,cost_tm1):
			cost = cost_tm1+po[0][o]
			return cost
		cost0 = theano.shared(0.)
		costs,_ = theano.scan(fn = cost_step,
			sequences = [p_output, output],
			outputs_info = [cost0]
			)

		l2 = T.sum(0)
		for param_i in self.params:
			l2 = l2+(param_i**2).sum()

		costs += 1e-4*l2

		grads = T.grad(costs[-1], self.params)
		grads_clip = [T.clip(grad,-100,100) for grad in grads]
		updates = tools.adadelta(self.params, grads_clip, 0.95, 1e-6)

		self.predict = theano.function(inputs = [input], outputs =[pred[-1]])
		self.train = theano.function(inputs= [input, output], outputs = costs[-1], updates = updates)
		self.test = theano.function(inputs= [input, output], outputs = costs[-1])
		self.getweight = theano.function(inputs = [input], outputs = [pred[0]])

Example #2

Show file

File: ntm.py Project: dandxy89/NTMtranslation

	def __init__(self, vector_size, head_type,head_num, controller_type, controller_sizes, memory_size, shift_width = 3, activation = T.tanh):
		self.lr = 0.01
		self.controller = controller_type(controller_sizes)
		input_w = tools.initial_weights(vector_size, controller_sizes[0])
		self.input_w = theano.shared(value = input_w, name = 'input_w', borrow=True)
		input_b = 0.*tools.initial_weights(controller_sizes[0])
		self.input_b = theano.shared(value = input_b, name = 'input_b', borrow=True)
		read_w = tools.initial_weights(memory_size[1], controller_sizes[0])
		self.read_w = theano.shared(value = read_w, name = 'read_w', borrow=True)
		output_w = tools.initial_weights(controller_sizes[-1], vector_size)
		self.output_w = theano.shared(value=output_w,name='Controller_outw', borrow=True)
		output_b = 0.*tools.initial_weights(controller_sizes[-1])
		self.output_b = theano.shared(value=output_b,name='Controller_outb', borrow=True)
		memory_init_p = 2*(numpy.random.rand(memory_size[0],memory_size[1])-0.5)
		weight_init_p = numpy.random.randn((memory_size[0]))
		self.memory_init = theano.shared(value = memory_init_p, name = 'memory_init', borrow=True)
		self.weight_init = theano.shared(value = weight_init_p, name = 'weight_init', borrow=True)
		self.params = self.controller.params+[self.input_w, self.read_w, self.input_b, self.weight_init,self.memory_init, self.output_w, self.output_b]

		self.heads = []
		for i in xrange(head_num):
			if head_type == Head_neural:
				self.heads.append(head_type(controller_sizes[-1], memory_size,i))
			else:
				self.heads.append(head_type(controller_sizes[-1], memory_size, shift_width,i))
			self.params += self.heads[i].params

		#memory_init = tools.initial_weights(memory_size)
		memory_init = self.memory_init
		#weight_init_s = T.nnet.sigmoid(self.weight_init)
		weight_init = tools.vector_softmax(self.weight_init)
		print self.params

		#def weighting(weight, value):
		#	return weight*value

		def pred_t(rawinput_t, weight_tm1, memory_tm1):
			#memory_tm1 = self
			#predict the current output 
			input_t = T.dot(rawinput_t,self.input_w)
			read_m = T.dot(weight_tm1, memory_tm1)
			read_t = T.dot(read_m,self.read_w)
			controller_input = activation(input_t+read_t+self.input_b)
			#zero_vec = theano.shared(value=numpy.zeros((vector_size,)))
			#mask = T.nonzero(T.eq(rawinput_t,0))
			hid = self.controller.getY(controller_input)
			output = T.nnet.sigmoid(T.dot(hid, self.output_w)+self.output_b)
			#result = T.switch(T.eq(zero_vec,rawinput_t),output,theano.shared(0))
			result = output
			#testing = T.switch(T.eq(zero_vec,rawinput_t),theano.shared(1),theano.shared(0))
			#result = theano.shared(value=numpy.zeros((vector_size,)))
			
			#result = read_m
			#emit the weights
			
			memory_inter = memory_tm1
			weight_inter = weight_tm1
			for head in self.heads:
				weight_inter, erase, add= head.emit_new_weight(hid, weight_inter, memory_inter)
				#write to memory
				weight_tdim = weight_inter.dimshuffle((0, 'x'))
				erase_dim = erase.dimshuffle(('x', 0))
				add_dim = add.dimshuffle(('x', 0))
				M_erased = memory_inter*(1-(weight_tdim*erase_dim))
				memory_inter = M_erased+(weight_tdim*add_dim)

			#testing = weight_tm1
			#testing2 = rawinput_t
			memory_t = memory_inter
			weight_t = weight_inter

			return weight_t, memory_t, result

		input = T.matrix()
		output = T.matrix()
		seqlength = input.shape[0]/2
		#tmp = T.dvector()
		#testinfo = self.controller.getY(input[1])
		#testinfo = input.shape

		pred, _ = theano.scan(fn = pred_t, 
							sequences = [input],
							outputs_info = [weight_init, memory_init,None ])

		
		entropy = T.sum(T.nnet.binary_crossentropy(5e-6+(1-1e-5)*pred[-1][seqlength+1:], output[seqlength+1:]),axis = 1)
		
		
		#costs = (pred[-1]-output) ** 2
		#cost_sq = T.sum(costs)


		l2 = T.sum(0)
		for param_i in self.params:
			l2 = l2+(param_i**2).sum()
		#norm = l2
		cost = T.sum(entropy) +1e-3*l2


		grads = [T.grad(cost, param_i) for param_i in self.params]
		grads_clip = [T.clip(grad,-10,10) for grad in grads]
		#params_up = [param_i for param_i, grad_i in zip(self.params, grads_clip)]
		#new_value = [param_i-self.lr*grad_i for param_i, grad_i in zip(self.params, grads_clip)]
		#SGD
		#updates = [(param_i, param_i-self.lr*grad_i) for param_i, grad_i in zip(self.params, grads_clip)]
		#updates = zip(params_up, new_value)
		
		#adadelta
		updates = tools.adadelta(self.params, grads_clip, 0.95, 1e-6)
		#updates = tools.adadelta_another(self.params,grads_clip)

		self.test = theano.function(inputs = [], outputs = l2)
		self.predict = theano.function(inputs = [input], outputs = pred)
		self.grads = theano.function(inputs = [input, output], outputs = grads)
		#self.train = theano.function(inputs = [input,output], outputs = [input,output, costs, cost, pred[0],pred[2],pred[-1],grads[5],grads_clip[5], grads[6]], updates = updates)
		self.train = theano.function(inputs = [input,output], outputs = cost, updates = updates)#,mode=theano.compile.MonitorMode(post_func=tools.detect_nan))