Example #1
0
class SSLayer(object):
	def __init__(self, numpy_rng, theano_rng, n_inputs, n_outputs, n_targets, corruption=0.30, batch_size=400, activation='sigmoid'):
		self.rng = rng
		self.n_inputs = n_inputs
		self.n_outputs = n_outputs 
		self.encoder = HiddenLayer(self.rng, self.n_inputs, self.n_outputs, activation=activation)
		self.decoder = HiddenLayer(self.rng, self.n_outputs, self.n_inputs, activation=activation)

		self.numpy_rng = numpy_rng
		self.theano_rng = theano_rng
		self.x_lab = None
		self.x_unlab = None
		self.y_lab = None
		self.softmaxLayer = LogisticRegression(self.rng, n_outputs, n_targets, init_zero=False)
		self.params = self.encoder.params + self.decoder.params + self.softmaxLayer.params 
		self.delta_params = self.encoder.delta_params + self.decoder.delta_params + self.softmaxLayer.delta_params		


	@staticmethod
	def _shared_dataset(x, borrow=True):
		return theano.shared(np.asarray(x, dtype=theano.config.floatX), borrow=borrow)


	def get_cost_updates(self, x_lab, x_unlab, y_lab):
		self.x_lab = x_lab 
		self.x_unlab = x_unlab 
		self.y_lab = y_lab
		out_unlab = self.encoder.output(self.x_unlab)
		z_unlab = self.decoder.output(out_unlab)
		preds_lab = self.softmaxLayer.predict(x_lab)

		accuracy = self.softmaxLayer.calcAccuracy(x_lab, y_lab)
		cost_reconstruction_unlab = T.mean((z_unlab-x_unlab)*(z_unlab-x_unlab))
		cost_reconstruction_lab = T.mean()  
		cost_classification = self.softmaxLayer.cost(x_lab, y_lab)
		cost = cost_reconstruction + 100*cost_classification 

		updates = OrderedDict()
		gparams = T.grad(cost, wrt=self.params)
		for p, gp in zip(params, gparams):
			updates[p] = p - gp*learning_rate

		exit()

		return (cost, accuracy)


	# for a better control, this fn will take numpy arrays. 
	# make batches such that they have some respresentation from labelled data as well and if possible with the same amount of points per class.
	def train(self, x_lab_numpy, y_lab_numpy, xunlab_numpy):
		pass
Example #2
0
	def __init__(self, rng, hidden_layer, n_in=None, n_out=None, w_layers=None, b_layers=None, config=1):
		self.hidden_layer = hidden_layer
		self.rng = rng
		self.params = []
		self.n_in = n_in
		self.n_out = n_out
		self.rng = rng
		self.layers = []
		self.x = None

		self.opLayer = LogisticRegression(rng, hidden_layer[-1], n_out)
		# self.X = X 
		prev_out = self.n_in
		self.params = []
		self.delta_params = []

#  construct a neural network with provided weights for each layer and then add a log layer ..

		if w_layers and len(hidden_layer) == len(w_layers):
			for i in range(len(w_layers)):
				w_np = w_layers[i]
				b_np = b_layers[i]
				print w_np.shape
				print w_np
				# exit()

				w = theano.shared(value=np.asarray(w_np, dtype=theano.config.floatX), name='We', borrow=True)
				b = theano.shared(value=np.asarray(b_np, dtype=theano.config.floatX), name='b', borrow=True)
				# w = theano.shared(
    #         		value=np.asarray(
    #             		rng.uniform(
    #                 		low=-6*np.sqrt(6. / (n_inputs + n_outputs)),
    #                 		high=6*np.sqrt(6. / (n_inputs + n_outputs)),
    #                 		size=(n_inputs, n_outputs)
    #             		),
    #             		dtype=theano.config.floatX),
    #         		name='w',
    #         		borrow=True
    #     		)

				# w = theano.shared(value=np.asarray(rng.random(size=(n_inputs, n_outputs)),dtype=theano.config.floatX),name='w',borrow=True)
				HL = HiddenLayer(self.rng, prev_out, self.n_out, init_w=w, init_b=b)
				# prev_out = 
				self.params += HL.params
				self.delta_params = self.delta_params + HL.delta_params 
		else:
			for ind, h in enumerate(hidden_layer):
				HL = HiddenLayer(self.rng, prev_out, h)
				self.layers.append(HL)
				prev_out = h
				self.params += HL.params
				self.delta_params = self.delta_params + HL.delta_params 

		self.params += self.opLayer.params
		self.delta_params = self.delta_params + self.opLayer.delta_params
Example #3
0
	def __init__(self, numpy_rng, hidden_layers, x_lab_np, y_lab_np, x_unlab_np, alpha=100, beta=3, batch_size=500, theano_rng=None, activation='tanh'):
		self.numpy_rng = numpy_rng
		self.theano_rng = theano_rng
		self.batch_size = batch_size
		self.hidden_layers = hidden_layers
		self.alpha = alpha
		self.beta = beta

		# initializing the alpha values for all the layers ..... 
		# if not isinstance(alpha, list):
		# 	self.alpha = [alpha] * len(hidden_layers) 

		self.theano_rng = RandomStreams(numpy_rng.randint( 2 ** 30))
		self.num_layers = len(hidden_layers)
		self.params_layers = []
		self.x_lab_np = x_lab_np
		self.x_unlab_np = x_unlab_np
		self.y_lab_np = y_lab_np

		# y with one of K encoding ....
		# self.y_lab_np_1_K = utils.one_of_K_encoding(y_lab_np, num_classes=10)

		self.x_lab = T.matrix('x_lab')
		self.x_unlab = T.matrix('x_unlab')
		self.y_lab = T.ivector('y_lab')
		# self.x_total = 
		self.num_samples = self.x_lab_np.shape[0] + self.x_unlab_np.shape[0]
		input_size = self.x_lab_np.shape[1]
		self.input_size = input_size

		target_size = len(list(set(utils.reduce_encoding(y_lab_np))))
		self.target_size = target_size
		output_size = hidden_layers[0]

		self.layers = []
		self.params = []
		for i, hl in enumerate(hidden_layers):
			if i == 0:
				input_lab = self.x_lab
				input_unlab = self.x_unlab
				out_lab = self.y_lab
			if i > 0:
				out_lab = self.y_lab
				input_lab = self.layers[-1].output(input_lab)
				input_unlab = self.layers[-1].output(input_unlab)
				input_size = hidden_layers[i-1]
				output_size = hl
			ssda = SSDAELayer(numpy_rng, self.theano_rng, input_size, output_size, target_size, x_lab=input_lab, y_lab=out_lab, x_unlab=input_unlab, activation=activation, alpha=self.alpha, beta=self.beta)
			# ssda = SSCAELayer(numpy_rng, self.theano_rng, input_size, output_size, target_size, x_lab=input_lab, y_lab=out_lab, x_unlab=input_unlab, activation=activation)
			self.layers.append(ssda)
			self.params = self.params + ssda.params 

		self.logLayer = LogisticRegression(self.numpy_rng, hidden_layers[-1], self.target_size, init_zero=True)
Example #4
0
	def __init__(self, numpy_rng, theano_rng, n_inputs, n_outputs, n_targets, corruption=0.30, batch_size=400, activation='sigmoid'):
		self.rng = rng
		self.n_inputs = n_inputs
		self.n_outputs = n_outputs 
		self.encoder = HiddenLayer(self.rng, self.n_inputs, self.n_outputs, activation=activation)
		self.decoder = HiddenLayer(self.rng, self.n_outputs, self.n_inputs, activation=activation)

		self.numpy_rng = numpy_rng
		self.theano_rng = theano_rng
		self.x_lab = None
		self.x_unlab = None
		self.y_lab = None
		self.softmaxLayer = LogisticRegression(self.rng, n_outputs, n_targets, init_zero=False)
		self.params = self.encoder.params + self.decoder.params + self.softmaxLayer.params 
		self.delta_params = self.encoder.delta_params + self.decoder.delta_params + self.softmaxLayer.delta_params		
Example #5
0
	def __init__(self, numpy_rng, theano_rng, n_inputs, n_outputs, n_targets, x_lab=None, x_unlab=None, y_lab=None, learning_rate = 0.020, corruption=0.20, batch_size=400, alpha=700, beta=3, tied=False, activation='tanh'):
		self.numpy_rng = numpy_rng
		self.theano_rng = theano_rng
		self.n_inputs = n_inputs
		self.n_outputs = n_outputs 
		self.alpha = alpha
		self.beta = beta
		self.encoder = HiddenLayer(self.numpy_rng, self.n_inputs, self.n_outputs, activation=activation)

		if tied == True:
			self.decoder = HiddenLayer(self.numpy_rng, self.n_outputs, self.n_inputs, init_w=self.encoder.w.T, activation=activation)
		else:
			self.decoder = HiddenLayer(self.numpy_rng, self.n_outputs, self.n_inputs, activation=activation)
		
		self.learning_rate = learning_rate
		self.activation = activation
		self.out_lab = self.encoder.output(x_lab)
		self.out_unlab = self.encoder.output(x_unlab)
		self.layer_num = self.__layer_nums.next()
		# self.inp_lab 

		if x_lab == None:
			self.x_lab = T.matrix('inp_lab')
		else:
			self.x_lab = x_lab

		if x_unlab == None:
			self.x_unlab = T.matrix('inp_unlab')
		else:
			self.x_unlab = x_unlab

		if y_lab == None:
			self.y_lab = T.matrix('y_lab')
		else:
			self.y_lab = y_lab

		self.softmaxLayer = LogisticRegression(self.numpy_rng, n_outputs, n_targets, init_zero=False)
		self.encoderParams = self.encoder.params
		self.params = self.encoder.params + self.decoder.params
		self.paramsAll = self.encoder.params + self.decoder.params + self.softmaxLayer.params
Example #6
0
class DNN(object):
	def __init__(self, rng, hidden_layer, n_in=None, n_out=None, w_layers=None, b_layers=None, config=1):
		self.hidden_layer = hidden_layer
		self.rng = rng
		self.params = []
		self.n_in = n_in
		self.n_out = n_out
		self.rng = rng
		self.layers = []
		self.x = None

		self.opLayer = LogisticRegression(rng, hidden_layer[-1], n_out)
		# self.X = X 
		prev_out = self.n_in
		self.params = []
		self.delta_params = []

#  construct a neural network with provided weights for each layer and then add a log layer ..

		if w_layers and len(hidden_layer) == len(w_layers):
			for i in range(len(w_layers)):
				w_np = w_layers[i]
				b_np = b_layers[i]
				print w_np.shape
				print w_np
				# exit()

				w = theano.shared(value=np.asarray(w_np, dtype=theano.config.floatX), name='We', borrow=True)
				b = theano.shared(value=np.asarray(b_np, dtype=theano.config.floatX), name='b', borrow=True)
				# w = theano.shared(
    #         		value=np.asarray(
    #             		rng.uniform(
    #                 		low=-6*np.sqrt(6. / (n_inputs + n_outputs)),
    #                 		high=6*np.sqrt(6. / (n_inputs + n_outputs)),
    #                 		size=(n_inputs, n_outputs)
    #             		),
    #             		dtype=theano.config.floatX),
    #         		name='w',
    #         		borrow=True
    #     		)

				# w = theano.shared(value=np.asarray(rng.random(size=(n_inputs, n_outputs)),dtype=theano.config.floatX),name='w',borrow=True)
				HL = HiddenLayer(self.rng, prev_out, self.n_out, init_w=w, init_b=b)
				# prev_out = 
				self.params += HL.params
				self.delta_params = self.delta_params + HL.delta_params 
		else:
			for ind, h in enumerate(hidden_layer):
				HL = HiddenLayer(self.rng, prev_out, h)
				self.layers.append(HL)
				prev_out = h
				self.params += HL.params
				self.delta_params = self.delta_params + HL.delta_params 

		self.params += self.opLayer.params
		self.delta_params = self.delta_params + self.opLayer.delta_params



	def forward(self, X):
		# self.activations = []
		inp = X
		# self.x = X
		activations = [X]
		for i, l in enumerate(self.layers):
			act = l.output(inp)
			activations.append(act)
			inp = act
		return activations


	def cost(self, X, y):
		act = self.forward(X)
		estimate = act[-1]
		return self.opLayer.cost(estimate, y)



	def calcAccuracy(self, X, y):
		act = self.forward(X)
		estimate = act[-1]
		return self.opLayer.calcAccuracy(estimate, y)


	def calcAccuracyTimit(self, X, y):
		act = self.forward(X)
		estimate = act[-1]
		return self.opLayer.calcAccuracyTimitMono(estimate, y)
		

	# def calcAccuracyTimitMono39(self, X, y):
	# 	act = self.forward(X)
	# 	estimate = act[-1]
	# 	return self.opLayer.calcAccuracyTimitMono39(estimate, y)


	def prettyprint(self):
		pass
		# print self.w.get_value()
		# print self.b.get_value()


	def get_weight(self):
		w_list = []
		for l in self.layers:
			print l.get_weight().shape
			w_list.append(l.get_weight())

		w_list.append(self.opLayer.get_weight())
		return w_list
Example #7
0
class SSDAELayer(object):

	# class variable to keep track of layers created 
	__layer_nums = count(0)
	def __init__(self, numpy_rng, theano_rng, n_inputs, n_outputs, n_targets, x_lab=None, x_unlab=None, y_lab=None, learning_rate = 0.020, corruption=0.20, batch_size=400, alpha=700, beta=3, tied=False, activation='tanh'):
		self.numpy_rng = numpy_rng
		self.theano_rng = theano_rng
		self.n_inputs = n_inputs
		self.n_outputs = n_outputs 
		self.alpha = alpha
		self.beta = beta
		self.encoder = HiddenLayer(self.numpy_rng, self.n_inputs, self.n_outputs, activation=activation)

		if tied == True:
			self.decoder = HiddenLayer(self.numpy_rng, self.n_outputs, self.n_inputs, init_w=self.encoder.w.T, activation=activation)
		else:
			self.decoder = HiddenLayer(self.numpy_rng, self.n_outputs, self.n_inputs, activation=activation)
		
		self.learning_rate = learning_rate
		self.activation = activation
		self.out_lab = self.encoder.output(x_lab)
		self.out_unlab = self.encoder.output(x_unlab)
		self.layer_num = self.__layer_nums.next()
		# self.inp_lab 

		if x_lab == None:
			self.x_lab = T.matrix('inp_lab')
		else:
			self.x_lab = x_lab

		if x_unlab == None:
			self.x_unlab = T.matrix('inp_unlab')
		else:
			self.x_unlab = x_unlab

		if y_lab == None:
			self.y_lab = T.matrix('y_lab')
		else:
			self.y_lab = y_lab

		self.softmaxLayer = LogisticRegression(self.numpy_rng, n_outputs, n_targets, init_zero=False)
		self.encoderParams = self.encoder.params
		self.params = self.encoder.params + self.decoder.params
		self.paramsAll = self.encoder.params + self.decoder.params + self.softmaxLayer.params
		# self.params = self.encoder.params + self.decoder.params
		# self.delta_params = self.encoder.delta_params + self.decoder.delta_params + self.softmaxLayer.delta_params		


	@staticmethod
	def _shared_dataset(x, borrow=True):
		return theano.shared(np.asarray(x, dtype=theano.config.floatX), borrow=borrow)


	@classmethod
	def count_instances(cls):
		cls.layer_num += 1


	def output(self, x):
		out = self.encoder.output(x)
		# out_unlab = self.encoder.output(x_unlab)
		return out

#  outputs the predictions from softmax layer ...
	def predict(self, x):	
		return self.softmaxLayer.predict(x)


	def predict_np(self, wc, bc, z_np):
		softmaxout = np.dot(z_np, wc) + bc
		preds = np.argmax(softmaxout, axis=1)
		return preds

# 
	def get_cost_updates(self):
		out_unlab = self.encoder.output(self.x_unlab)
		out_lab = self.encoder.output(self.x_lab)
		z_unlab = self.decoder.output(out_unlab)
		z_lab = self.decoder.output(out_lab)
		preds_lab = self.softmaxLayer.predict(out_lab)
		self.preds_lab = preds_lab
		# alpha=0
		beta_range=[1, 10, 100, 200, 500, 800, 1000, 2000, 5000]
		# beta=700
		# alpha = 3
		print "value of alpha is:", self.alpha
		print "value of beta is:", self.beta
		lr = Learning_Rate_Linear_Decay(start_rate=0.02)

		# accuracy = self.softmaxLayer.calcAccuracy(out_lab, y_lab)
		# cost_reconstruction_unlab = T.mean((z_unlab-x_unlab)*(z_unlab-x_unlab))
		# cost_reconstruction_lab = T.mean((z_lab - x_lab)*(z_lab - x_lab))  
		if self.activation == 'sigmoid':
			crl = -T.sum(self.x_lab * T.log(z_lab) + (1 - self.x_lab) * T.log(1 - z_lab), axis=1)
			cost_reconstruction_lab = T.mean(crl)
			cost_reconstruction_unlab = T.mean(-T.mean(self.x_unlab * T.log(z_unlab) + (1 - self.x_unlab) * T.log(1-z_unlab), axis=1))
		elif self.activation == 'tanh':
			cost_reconstruction_lab = T.mean(T.mean((self.x_lab - z_lab)*(self.x_lab - z_lab), axis=1), axis=0)
			cost_reconstruction_unlab = T.mean(T.mean((self.x_unlab - z_unlab)*(self.x_unlab - z_unlab), axis=1), axis=0)

		preds = self.softmaxLayer.predict(out_lab)
		accuracy = self.softmaxLayer.calcAccuracy(out_lab, self.y_lab)
		cost_classification = self.softmaxLayer.cost(out_lab, self.y_lab) 
		cost1 = self.beta * (cost_reconstruction_lab + cost_reconstruction_unlab) 
		cost2 = self.alpha * cost_classification
		cost = self.beta * (cost_reconstruction_lab + cost_reconstruction_unlab) + self.alpha * cost_classification  
		# debugprint(cost)
		# if self.debug_mode == True:
		# theano.printing.pydotprint(cost, outfile='symbolic_graph_costx' + str(self.layer_num) + '.png', var_with_name_simple=True)
		
		updates = OrderedDict()
		gparams = T.grad(cost, wrt=self.paramsAll)
		gparams2 = T.grad(cost1, wrt=self.params)
		for p, gp in zip(self.paramsAll, gparams):
			updates[p] = p - gp*self.learning_rate


		# for p, gp in zip(self.params, gparams2):
		# 	updates[p] = p - gp*self.learning_rate

		# debugprint(cost)
		return [cost, cost1, cost_classification, accuracy, updates]


	# for a better control, this fn will take numpy arrays. 
	# make batches such that they have some respresentation from labelled data as well and if possible with the same amount of points per class.
	def train(self, x_lab_numpy, y_lab_numpy, xunlab_numpy):
		pass
		# batch_sgd_train = theano.function(inputs=[index_unlab, index_lab], outputs=[cost, accuracy], givens={xlab:xlab[]})


	def getWc(self):
		return self.softmaxLayer.get_weight()


	def setWc(self, wc):
		self.softmaxLayer.set_weight(wc)


	def get_weight(self):
		return self.encoder.get_weight()


	def get_bias(self):
		return self.encoder.get_bias()


	# function to update classifier weight manually without using the computational graph ...
	# use one-of-K  encoding here ....
	# alternative way to update Wc, using its numpy value.


	def update_Wc(self, target, output, z):
		eta = 0.01
		wc = self.getWc()
		n_rows, n_cols = wc.shape[0], wc.shape[1]
		for i in xrange(n_rows):
			for j in xrange(n_cols):
				if target[i, j] == 1:
					# wc[i,j] = wc[i,j] - eta * (target[i,j] - output[i,j])* z[i]  
					wc[i,j] = wc[i,j] - eta * (target[i,j] - output[i,j])

		self.setWc(wc)


	def  update_Wc_We(self, x, We, oldWc, newWc):
		eta = 0.01
		delta_Wc = newWc - oldWc
		error = enc_out * ( 1- enc_out)				
		We = We - eta * T.dot(x.T , error)
		return We
Example #8
0
class SSDAE(object):
	def __init__(self, numpy_rng, hidden_layers, x_lab_np, y_lab_np, x_unlab_np, alpha=100, beta=3, batch_size=500, theano_rng=None, activation='tanh'):
		self.numpy_rng = numpy_rng
		self.theano_rng = theano_rng
		self.batch_size = batch_size
		self.hidden_layers = hidden_layers
		self.alpha = alpha
		self.beta = beta

		# initializing the alpha values for all the layers ..... 
		# if not isinstance(alpha, list):
		# 	self.alpha = [alpha] * len(hidden_layers) 

		self.theano_rng = RandomStreams(numpy_rng.randint( 2 ** 30))
		self.num_layers = len(hidden_layers)
		self.params_layers = []
		self.x_lab_np = x_lab_np
		self.x_unlab_np = x_unlab_np
		self.y_lab_np = y_lab_np

		# y with one of K encoding ....
		# self.y_lab_np_1_K = utils.one_of_K_encoding(y_lab_np, num_classes=10)

		self.x_lab = T.matrix('x_lab')
		self.x_unlab = T.matrix('x_unlab')
		self.y_lab = T.ivector('y_lab')
		# self.x_total = 
		self.num_samples = self.x_lab_np.shape[0] + self.x_unlab_np.shape[0]
		input_size = self.x_lab_np.shape[1]
		self.input_size = input_size

		target_size = len(list(set(utils.reduce_encoding(y_lab_np))))
		self.target_size = target_size
		output_size = hidden_layers[0]

		self.layers = []
		self.params = []
		for i, hl in enumerate(hidden_layers):
			if i == 0:
				input_lab = self.x_lab
				input_unlab = self.x_unlab
				out_lab = self.y_lab
			if i > 0:
				out_lab = self.y_lab
				input_lab = self.layers[-1].output(input_lab)
				input_unlab = self.layers[-1].output(input_unlab)
				input_size = hidden_layers[i-1]
				output_size = hl
			ssda = SSDAELayer(numpy_rng, self.theano_rng, input_size, output_size, target_size, x_lab=input_lab, y_lab=out_lab, x_unlab=input_unlab, activation=activation, alpha=self.alpha, beta=self.beta)
			# ssda = SSCAELayer(numpy_rng, self.theano_rng, input_size, output_size, target_size, x_lab=input_lab, y_lab=out_lab, x_unlab=input_unlab, activation=activation)
			self.layers.append(ssda)
			self.params = self.params + ssda.params 

		self.logLayer = LogisticRegression(self.numpy_rng, hidden_layers[-1], self.target_size, init_zero=True)


	@staticmethod
	def _shared_dataset(x, borrow=True):
		return theano.shared(np.asarray(x, dtype=theano.config.floatX), borrow=borrow)


	@staticmethod
	def _shared_dataset_y(y, borrow=True):
		rval = theano.shared(np.asarray(y, dtype=theano.config.floatX), borrow=borrow)
		return T.cast(rval, 'int32')


	def get_training_functions(self, x_lab=None, y_lab=None, x_unlab=None):
		# assert xlab.shape[0] == len(y_lab) 
		assert self.x_lab_np.shape[0] == len(self.y_lab_np)
		# self.x_lab = self._shared_dataset(self.x_lab_np)
		# self.y_lab = self._shared_dataset(self.y_lab_np)
		# self.x_unlab = self._shared_dataset(self.x_unlab_np)

		self.alpha = self.x_lab_np.shape[0] / float(self.x_lab_np.shape[0] + self.x_unlab_np.shape[0])
		index_unlab = T.ivector('index_unlab')
		index_lab = T.ivector('index_lab')
		momentum = T.scalar('momentum')
		learning_rate = T.scalar('learning_rate')

		# cost, updates = self.get_cost_updates(self.x_lab, self.x_unlab, self.y_lab)

		self.batch_size_lab = int(self.batch_size * self.alpha)
		self.batch_size_unlab = int(self.batch_size * (1-self.alpha))

		self.num_labels = self.x_lab_np.shape[0]
		self.num_unlabels = self.x_unlab_np.shape[0]
		self.num_samples = self.num_labels + self.num_unlabels

		num_batches = self.num_samples / float(self.batch_size)
		pretraining_fns = []
		for i in xrange(len(self.hidden_layers)):
			l = self.layers[i]
			# if i ==0:
			# 	x_l = self.x_lab
			# 	x_ul = self.x_unlab
			# 	y_l = self.y_lab

			# if i > 0:
			# 	x_l = self.layers[i-1].out_lab
			# 	x_ul = self.layers[i-1].out_unlab
			# 	y_l = self.y_lab
			
			# result = l.get_cost_updates(x_l, x_ul, y_l)
			result = l.get_cost_updates()
			cost1, cost2, cost3, preds, updates = result[0], result[1], result[2], result[3], result[4]

			train_fn = theano.function(inputs=[index_lab, index_unlab], updates=updates, outputs=[cost1, cost2, cost3, preds], givens={self.x_lab:x_lab[index_lab], self.x_unlab:x_unlab[index_unlab], self.y_lab:y_lab[index_lab]}, on_unused_input='warn')
			pretraining_fns.append(train_fn)

		return  pretraining_fns




# this function does the complete training for the network. single point function.
	def trainSGD(self, epochs=3):

		# if epochs is just a single value, use it for all the layers combined ....
		if not isinstance(epochs, (list, tuple)):
			epochs = epochs * len(self.hidden_layers)		
		self.num_batches = int(self.num_samples / self.batch_size)
		NUM_EPOCHS = epochs
		x_lab_shared = self._shared_dataset(self.x_lab_np)
		x_unlab_shared = self._shared_dataset(self.x_unlab_np)
		y_lab_shared = self._shared_dataset_y(self.y_lab_np)

		# get pretrining fns for all the layers ....
		pretrain_fns = self.get_training_functions(x_lab=x_lab_shared, x_unlab=x_unlab_shared, y_lab=y_lab_shared)
		print "number of labelled samples is:", self.num_labels
		print "number of unlabelled samples is:", self.num_unlabels

		indices_lab = np.arange(self.num_labels, dtype=np.dtype('int32'))
		indices_unlab = np.arange(self.num_unlabels, dtype=np.dtype('int32'))
		c = []
		c1= []
		c2 = []
		c3 = []
		c4 = []


		print "............ Pretrainining ..............."
		# pretraining loop for all the hidden layers .....
		for i in xrange(len(self.hidden_layers)):
			total_epochs = NUM_EPOCHS[i]
			la = self.layers[i]
			wc_np = la.softmaxLayer.w.get_value()

			for epoch in xrange(total_epochs):
				for j in xrange(self.num_batches - 1):
					index_lab = indices_lab[j*self.batch_size_lab:(j+1)*self.batch_size_lab]
					index_unlab = indices_unlab[j*self.batch_size_unlab:(j+1)*self.batch_size_unlab]
					res = pretrain_fns[i](index_lab=index_lab, index_unlab=index_unlab)
						# cost = 
					c.append(res[0])
					c1.append(res[1])
					c2.append(res[2])
					c3.append(res[3])
					# c4.append(res[4])
					x_full_np = np.vstack((self.x_lab_np[j*self.batch_size_lab:(j+1)*self.batch_size_lab,:], self.x_unlab_np[j*self.batch_size_unlab:(j+1)*self.batch_size_unlab,:]))

					wt_np = self.layers[i].encoder.w.get_value()
					b_np = self.layers[i].encoder.b.get_value()
					wc_np = self.layers[i].softmaxLayer.w.get_value()
					wd_np = self.layers[i].decoder.w.get_value()
					bc_np = self.layers[i].softmaxLayer.b.get_value()

				# out = np.dot(self.x_lab_np , wt_np) + b_np
				# out2 = np.dot(out, wc_np) + bc_np
				# preds_np = np.argmax(out2, axis=1)
				# preds = la.predict(out)
				# preds_np = utils.one_of_K_encoding(preds_np, 10) 

				# # print wt_np[100,1], np.mean(b_np), out[100,1], out2[100, 1]
				# print "wc  we    out    wd:", np.mean(wc_np.flatten()),  np.mean(wt_np.flatten()), np.mean(out[100,:]), np.mean(wd_np[100,:])
				print "epoch is:", epoch 
				print "cost is: %d, %d, %d", np.nanmean(c), np.nanmean(c1), np.nanmean(c2) , np.mean(c3)
				# oldWe = wt_np
				# oldWc = la.getWc()
				# la.update_Wc(self.y_lab_np_1_K, preds_np, out)
				# new_Wc = la.getWc()
				# new_We = la.update_Wc_We(x_full, oldWe, oldWc, new_Wc)
				# print np.mean(wc_np[110,:])

		# pass



	def trainSGDSupervised(self, train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y):
		# dnn = DNN(self.numpy_rng, [self.hidden_layers[-1]], self.hidden_layers[-1], 10, w_layers=[self.layers[0].encoder.get_weight()], b_layers=[self.layers[0].encoder.get_bias()])
		dnn = DNN(self.numpy_rng, [self.hidden_layers[0]], self.hidden_layers[0], 10, w_layers=[self.layers[0].encoder.get_weight()], b_layers=[self.layers[0].encoder.get_bias()])
		# mnist_data = mnist.load_mnist_theano('mnist.pkl.gz')
		# Hl = HiddenLayer(self.numpy_rng, self.input_size, self.hidden_layers[0], init_w=self.layers[0].get_weight(), init_b=self.layers[0].get_bias(), activation='tanh')
		# mnist_data = mnist.load_mnist_numpy('mnist.pkl.gz')
		print "............... Final training starts now ........."
		# bsgd(dnn, mnist_data, epochs=40)

		# train_set_x, train_set_y = mnist_data[0]
		# valid_set_x, valid_set_y = mnist_data[1]
		# test_set_x, test_set_y = mnist_data[2]

		# train_set_x, train_set_y = train_set_x[:600,:], train_set_y[:600]
		batch_size = 300
		epochs = 140

		x_final = T.matrix('x_final')
		y_final = T.ivector('y_final')
		y_eval = T.ivector('y_eval')

		bsgd(dnn, mnist_data, epochs=25, lr=0.008)

		print train_set_x.shape, self.layers[0].get_weight().shape
		z1_np = np.tanh(np.dot(train_set_x, self.layers[0].get_weight()) + self.layers[0].get_bias())
		z2_np = np.tanh(np.dot(z1_np, self.layers[1].get_weight()) + self.layers[1].get_bias())
		# z3_np = np.tanh(np.dot(z2_np, self.layers[2].get_weight()) + self.layers[2].get_bias())
		z1_valid_np = np.tanh(np.dot(valid_set_x, self.layers[0].get_weight()) + self.layers[0].get_bias())
		z2_valid_np = np.tanh(np.dot(z1_valid_np, self.layers[1].get_weight()) + self.layers[1].get_bias())
		# z3_valid_np = np.tanh(np.dot(z2_valid_np, self.layers[2].get_weight()) + self.layers[2].get_bias())
		z1_test_np = np.tanh(np.dot(test_set_x, self.layers[0].get_weight()) + self.layers[0].get_bias())
		z2_test_np = np.tanh(np.dot(z1_test_np, self.layers[1].get_weight()) + self.layers[1].get_bias())
		# z3_test_np = np.tanh(np.dot(z2_test_np, self.layers[2].get_weight()) + self.layers[2].get_bias())


		z1 = self.layers[0].encoder.output(x_final)
		z2 = self.layers[1].encoder.output(z1)
		# z3 = self.layers[2].encoder.output(z2)

		def get_shared(x, borrow=True):
			x_shared = theano.shared(np.asarray(x, dtype=x.dtype), borrow=borrow)
			return x_shared


		def get_shared_int(y, borrow=True):
			y_shared = theano.shared(np.asarray(y), borrow=borrow)
			return T.cast(y_shared, 'int32')

		# print z3_np.shape
		train_set_x_shared = get_shared(train_set_x)
		train_set_z_shared = get_shared(z1_np)
		valid_set_x_shared = get_shared(valid_set_x)
		valid_set_z_shared = get_shared(z1_valid_np)
		test_set_x_shared = get_shared(test_set_x)
		test_set_z_shared = get_shared(z1_test_np)
		train_set_y_shared = get_shared_int(train_set_y)
		valid_set_y_shared = get_shared_int(valid_set_y)
		test_set_y_shared = get_shared_int(test_set_y)


		num_samples = train_set_x.shape[0] 
		indices = np.arange(num_samples,  dtype=np.dtype('int32'))
		num_batches = num_samples / batch_size

		supervised_cost = self.logLayer.cost(x_final, y_final) 
		supervised_accuracy = self.logLayer.calcAccuracy(x_final, y_final)
		# params_supervised = self.layers[-1].encoderParams + self.logLayer.params
		# params_supervised = Hl.params + self.logLayer.params

		# exit() 
		params_supervised =  self.logLayer.params

		updates = OrderedDict()
		p_final_grads = [T.grad(cost=supervised_cost, wrt = p) for p in params_supervised] 
		lr = 0.06

		for p, gp in zip(params_supervised, p_final_grads):
			updates[p] = p - lr*gp


		index = T.ivector('index')

		batch_sgd_train_final = theano.function(inputs=[index], outputs=[supervised_cost, supervised_accuracy], updates=updates, givens={x_final: train_set_z_shared[index], y_final:train_set_y_shared[index]})

		batch_sgd_valid_final = theano.function(inputs=[], outputs=[self.logLayer.calcAccuracy(x_final, y_final)], givens={x_final: valid_set_z_shared, y_final:valid_set_y_shared})
		batch_sgd_test_final = theano.function(inputs=[], outputs=[self.logLayer.calcAccuracy(x_final, y_final)], givens={x_final: test_set_z_shared, y_final:test_set_y_shared})
		train_accuracy = []

		for n in xrange(epochs):
			for i in xrange(num_batches):
				batch = indices[i*batch_size: (i+1)*batch_size]
				c,a = batch_sgd_train_final(index=batch)
				train_accuracy.append(a)

			print "epoch:", n, "  train accuracy", np.mean(a)
			print "epoch:", n,  " validation accuracy:",  batch_sgd_valid_final()
			# self.finalLogLayer = LogisticRegression(n_inputs=self.hidden_layers[-1], n_outputs=self.n_outputs, activation='tanh', init_zero=True)

		print "test accuracy:", batch_sgd_test_final()