예제 #1
0
파일: SSDAE.py 프로젝트: adhaka/kthasrdnn
class SSLayer(object):
	def __init__(self, numpy_rng, theano_rng, n_inputs, n_outputs, n_targets, corruption=0.30, batch_size=400, activation='sigmoid'):
		self.rng = rng
		self.n_inputs = n_inputs
		self.n_outputs = n_outputs 
		self.encoder = HiddenLayer(self.rng, self.n_inputs, self.n_outputs, activation=activation)
		self.decoder = HiddenLayer(self.rng, self.n_outputs, self.n_inputs, activation=activation)

		self.numpy_rng = numpy_rng
		self.theano_rng = theano_rng
		self.x_lab = None
		self.x_unlab = None
		self.y_lab = None
		self.softmaxLayer = LogisticRegression(self.rng, n_outputs, n_targets, init_zero=False)
		self.params = self.encoder.params + self.decoder.params + self.softmaxLayer.params 
		self.delta_params = self.encoder.delta_params + self.decoder.delta_params + self.softmaxLayer.delta_params		


	@staticmethod
	def _shared_dataset(x, borrow=True):
		return theano.shared(np.asarray(x, dtype=theano.config.floatX), borrow=borrow)


	def get_cost_updates(self, x_lab, x_unlab, y_lab):
		self.x_lab = x_lab 
		self.x_unlab = x_unlab 
		self.y_lab = y_lab
		out_unlab = self.encoder.output(self.x_unlab)
		z_unlab = self.decoder.output(out_unlab)
		preds_lab = self.softmaxLayer.predict(x_lab)

		accuracy = self.softmaxLayer.calcAccuracy(x_lab, y_lab)
		cost_reconstruction_unlab = T.mean((z_unlab-x_unlab)*(z_unlab-x_unlab))
		cost_reconstruction_lab = T.mean()  
		cost_classification = self.softmaxLayer.cost(x_lab, y_lab)
		cost = cost_reconstruction + 100*cost_classification 

		updates = OrderedDict()
		gparams = T.grad(cost, wrt=self.params)
		for p, gp in zip(params, gparams):
			updates[p] = p - gp*learning_rate

		exit()

		return (cost, accuracy)


	# for a better control, this fn will take numpy arrays. 
	# make batches such that they have some respresentation from labelled data as well and if possible with the same amount of points per class.
	def train(self, x_lab_numpy, y_lab_numpy, xunlab_numpy):
		pass
예제 #2
0
파일: SSDAE.py 프로젝트: adhaka/kthasrdnn
	def __init__(self, numpy_rng, theano_rng, n_inputs, n_outputs, n_targets, corruption=0.30, batch_size=400, activation='sigmoid'):
		self.rng = rng
		self.n_inputs = n_inputs
		self.n_outputs = n_outputs 
		self.encoder = HiddenLayer(self.rng, self.n_inputs, self.n_outputs, activation=activation)
		self.decoder = HiddenLayer(self.rng, self.n_outputs, self.n_inputs, activation=activation)

		self.numpy_rng = numpy_rng
		self.theano_rng = theano_rng
		self.x_lab = None
		self.x_unlab = None
		self.y_lab = None
		self.softmaxLayer = LogisticRegression(self.rng, n_outputs, n_targets, init_zero=False)
		self.params = self.encoder.params + self.decoder.params + self.softmaxLayer.params 
		self.delta_params = self.encoder.delta_params + self.decoder.delta_params + self.softmaxLayer.delta_params		
예제 #3
0
파일: SSDAE.py 프로젝트: adhaka/kthasrdnn
	def __init__(self, numpy_rng, theano_rng, n_inputs, n_outputs, n_targets, x_lab=None, x_unlab=None, y_lab=None, learning_rate = 0.020, corruption=0.20, batch_size=400, alpha=700, beta=3, tied=False, activation='tanh'):
		self.numpy_rng = numpy_rng
		self.theano_rng = theano_rng
		self.n_inputs = n_inputs
		self.n_outputs = n_outputs 
		self.alpha = alpha
		self.beta = beta
		self.encoder = HiddenLayer(self.numpy_rng, self.n_inputs, self.n_outputs, activation=activation)

		if tied == True:
			self.decoder = HiddenLayer(self.numpy_rng, self.n_outputs, self.n_inputs, init_w=self.encoder.w.T, activation=activation)
		else:
			self.decoder = HiddenLayer(self.numpy_rng, self.n_outputs, self.n_inputs, activation=activation)
		
		self.learning_rate = learning_rate
		self.activation = activation
		self.out_lab = self.encoder.output(x_lab)
		self.out_unlab = self.encoder.output(x_unlab)
		self.layer_num = self.__layer_nums.next()
		# self.inp_lab 

		if x_lab == None:
			self.x_lab = T.matrix('inp_lab')
		else:
			self.x_lab = x_lab

		if x_unlab == None:
			self.x_unlab = T.matrix('inp_unlab')
		else:
			self.x_unlab = x_unlab

		if y_lab == None:
			self.y_lab = T.matrix('y_lab')
		else:
			self.y_lab = y_lab

		self.softmaxLayer = LogisticRegression(self.numpy_rng, n_outputs, n_targets, init_zero=False)
		self.encoderParams = self.encoder.params
		self.params = self.encoder.params + self.decoder.params
		self.paramsAll = self.encoder.params + self.decoder.params + self.softmaxLayer.params
예제 #4
0
파일: SSDAE.py 프로젝트: adhaka/kthasrdnn
class SSDAELayer(object):

	# class variable to keep track of layers created 
	__layer_nums = count(0)
	def __init__(self, numpy_rng, theano_rng, n_inputs, n_outputs, n_targets, x_lab=None, x_unlab=None, y_lab=None, learning_rate = 0.020, corruption=0.20, batch_size=400, alpha=700, beta=3, tied=False, activation='tanh'):
		self.numpy_rng = numpy_rng
		self.theano_rng = theano_rng
		self.n_inputs = n_inputs
		self.n_outputs = n_outputs 
		self.alpha = alpha
		self.beta = beta
		self.encoder = HiddenLayer(self.numpy_rng, self.n_inputs, self.n_outputs, activation=activation)

		if tied == True:
			self.decoder = HiddenLayer(self.numpy_rng, self.n_outputs, self.n_inputs, init_w=self.encoder.w.T, activation=activation)
		else:
			self.decoder = HiddenLayer(self.numpy_rng, self.n_outputs, self.n_inputs, activation=activation)
		
		self.learning_rate = learning_rate
		self.activation = activation
		self.out_lab = self.encoder.output(x_lab)
		self.out_unlab = self.encoder.output(x_unlab)
		self.layer_num = self.__layer_nums.next()
		# self.inp_lab 

		if x_lab == None:
			self.x_lab = T.matrix('inp_lab')
		else:
			self.x_lab = x_lab

		if x_unlab == None:
			self.x_unlab = T.matrix('inp_unlab')
		else:
			self.x_unlab = x_unlab

		if y_lab == None:
			self.y_lab = T.matrix('y_lab')
		else:
			self.y_lab = y_lab

		self.softmaxLayer = LogisticRegression(self.numpy_rng, n_outputs, n_targets, init_zero=False)
		self.encoderParams = self.encoder.params
		self.params = self.encoder.params + self.decoder.params
		self.paramsAll = self.encoder.params + self.decoder.params + self.softmaxLayer.params
		# self.params = self.encoder.params + self.decoder.params
		# self.delta_params = self.encoder.delta_params + self.decoder.delta_params + self.softmaxLayer.delta_params		


	@staticmethod
	def _shared_dataset(x, borrow=True):
		return theano.shared(np.asarray(x, dtype=theano.config.floatX), borrow=borrow)


	@classmethod
	def count_instances(cls):
		cls.layer_num += 1


	def output(self, x):
		out = self.encoder.output(x)
		# out_unlab = self.encoder.output(x_unlab)
		return out

#  outputs the predictions from softmax layer ...
	def predict(self, x):	
		return self.softmaxLayer.predict(x)


	def predict_np(self, wc, bc, z_np):
		softmaxout = np.dot(z_np, wc) + bc
		preds = np.argmax(softmaxout, axis=1)
		return preds

# 
	def get_cost_updates(self):
		out_unlab = self.encoder.output(self.x_unlab)
		out_lab = self.encoder.output(self.x_lab)
		z_unlab = self.decoder.output(out_unlab)
		z_lab = self.decoder.output(out_lab)
		preds_lab = self.softmaxLayer.predict(out_lab)
		self.preds_lab = preds_lab
		# alpha=0
		beta_range=[1, 10, 100, 200, 500, 800, 1000, 2000, 5000]
		# beta=700
		# alpha = 3
		print "value of alpha is:", self.alpha
		print "value of beta is:", self.beta
		lr = Learning_Rate_Linear_Decay(start_rate=0.02)

		# accuracy = self.softmaxLayer.calcAccuracy(out_lab, y_lab)
		# cost_reconstruction_unlab = T.mean((z_unlab-x_unlab)*(z_unlab-x_unlab))
		# cost_reconstruction_lab = T.mean((z_lab - x_lab)*(z_lab - x_lab))  
		if self.activation == 'sigmoid':
			crl = -T.sum(self.x_lab * T.log(z_lab) + (1 - self.x_lab) * T.log(1 - z_lab), axis=1)
			cost_reconstruction_lab = T.mean(crl)
			cost_reconstruction_unlab = T.mean(-T.mean(self.x_unlab * T.log(z_unlab) + (1 - self.x_unlab) * T.log(1-z_unlab), axis=1))
		elif self.activation == 'tanh':
			cost_reconstruction_lab = T.mean(T.mean((self.x_lab - z_lab)*(self.x_lab - z_lab), axis=1), axis=0)
			cost_reconstruction_unlab = T.mean(T.mean((self.x_unlab - z_unlab)*(self.x_unlab - z_unlab), axis=1), axis=0)

		preds = self.softmaxLayer.predict(out_lab)
		accuracy = self.softmaxLayer.calcAccuracy(out_lab, self.y_lab)
		cost_classification = self.softmaxLayer.cost(out_lab, self.y_lab) 
		cost1 = self.beta * (cost_reconstruction_lab + cost_reconstruction_unlab) 
		cost2 = self.alpha * cost_classification
		cost = self.beta * (cost_reconstruction_lab + cost_reconstruction_unlab) + self.alpha * cost_classification  
		# debugprint(cost)
		# if self.debug_mode == True:
		# theano.printing.pydotprint(cost, outfile='symbolic_graph_costx' + str(self.layer_num) + '.png', var_with_name_simple=True)
		
		updates = OrderedDict()
		gparams = T.grad(cost, wrt=self.paramsAll)
		gparams2 = T.grad(cost1, wrt=self.params)
		for p, gp in zip(self.paramsAll, gparams):
			updates[p] = p - gp*self.learning_rate


		# for p, gp in zip(self.params, gparams2):
		# 	updates[p] = p - gp*self.learning_rate

		# debugprint(cost)
		return [cost, cost1, cost_classification, accuracy, updates]


	# for a better control, this fn will take numpy arrays. 
	# make batches such that they have some respresentation from labelled data as well and if possible with the same amount of points per class.
	def train(self, x_lab_numpy, y_lab_numpy, xunlab_numpy):
		pass
		# batch_sgd_train = theano.function(inputs=[index_unlab, index_lab], outputs=[cost, accuracy], givens={xlab:xlab[]})


	def getWc(self):
		return self.softmaxLayer.get_weight()


	def setWc(self, wc):
		self.softmaxLayer.set_weight(wc)


	def get_weight(self):
		return self.encoder.get_weight()


	def get_bias(self):
		return self.encoder.get_bias()


	# function to update classifier weight manually without using the computational graph ...
	# use one-of-K  encoding here ....
	# alternative way to update Wc, using its numpy value.


	def update_Wc(self, target, output, z):
		eta = 0.01
		wc = self.getWc()
		n_rows, n_cols = wc.shape[0], wc.shape[1]
		for i in xrange(n_rows):
			for j in xrange(n_cols):
				if target[i, j] == 1:
					# wc[i,j] = wc[i,j] - eta * (target[i,j] - output[i,j])* z[i]  
					wc[i,j] = wc[i,j] - eta * (target[i,j] - output[i,j])

		self.setWc(wc)


	def  update_Wc_We(self, x, We, oldWc, newWc):
		eta = 0.01
		delta_Wc = newWc - oldWc
		error = enc_out * ( 1- enc_out)				
		We = We - eta * T.dot(x.T , error)
		return We