def __init__(self, config, is_training=True):
		# Maps' feature dictionary
		self._map_feature_dict = get_landmark_set(self._maps['grid']) # all featureas are the same for each map
		self._map_objects_dict = get_objects_set(self._maps['grid'])

		self._max_encoder_unrollings 	= config.encoder_unrollings
		self._max_decoder_unrollings 	= config.decoder_unrollings
		self._num_actions				= config.num_actions
		self._vocab_size 				= config.vocab_size
		self._y_size  					= 4*len(self._map_feature_dict) + len(self._map_objects_dict)

		# Model parameters
		self._n_hidden 					 	= config.num_nodes 	# same for encoder and decoder
		self._embedding_world_state_size = config.embedding_world_state_size

		self._init_learning_rate 			= tf.constant(config.learning_rate)		
		self._learning_rate 		 			= self._init_learning_rate
		self._learning_rate_decay_factor = config.learning_rate_decay_factor

		self._max_gradient_norm	= config.max_gradient_norm/3.0
		
		# debug parameters
		self._train_dir = "tmp/"			# dir where checkpoint files will be saves		
		# merging and writing vars
		self._writer = None
		# Dropout rate
		keep_prob = config.dropout_rate

		## TRAINING Placeholders
		self._encoder_inputs = []
		self._encoder_unrollings = tf.placeholder('int64')

		self._decoder_outputs = []
		self._decoder_unrollings = tf.placeholder('int64')
		self._world_state_vectors = [] 	# original sample structure, containing complete path, start_pos, end_pos, and map name
		for i in xrange(self._max_encoder_unrollings):
			self._encoder_inputs.append( tf.placeholder(tf.float32,shape=[1,self._vocab_size], name='x') )
		for i in xrange(self._max_decoder_unrollings):
			self._decoder_outputs.append( tf.placeholder(tf.int32,shape=[1], name='actions') )
			self._world_state_vectors.append( tf.placeholder(tf.float32,shape=[1,self._y_size], name='world_vect') )

		## TESTING / VALIDATION Placeholders
		self._test_st = tf.placeholder(tf.float32, [1, self._n_hidden], name='test_st')
		self._test_ct = tf.placeholder(tf.float32, [1, self._n_hidden], name='test_ct')
		self._test_yt = tf.placeholder(tf.float32, [1, self._y_size], name='test_yt')
		self._test_decoder_output = tf.placeholder(tf.float32,shape=[1,self._num_actions], name='test_action')


		with tf.variable_scope('Weights',reuse=(not is_training)) as scope:
			# Encoder - decoder transition
			#w_trans = tf.Variable(weight_initializer((2*self._n_hidden, 2*self._n_hidden)), name='w_trans')
			#b_trans = tf.Variable(tf.zeros([1,2*self._n_hidden	]), name='b_trans')
			
			# LSTM to softmax layer.
			wo = tf.get_variable('wo',shape=(self._n_hidden , self._num_actions),initializer=weight_initializer)
			bo = tf.get_variable('bo',shape=(1 , self._num_actions),initializer=weight_initializer)

		#######################################################################################################################
		
		with tf.variable_scope('Encoder',reuse=(not is_training)) as scope:
			## Encoder
			enc_cell = CustomLSTMCell(self._n_hidden, forget_bias=1.0, input_size=self._vocab_size)
			enc_cell_dp = tf.nn.rnn_cell.DropoutWrapper(enc_cell,output_keep_prob=keep_prob)

			hs,last_state = tf.nn.rnn(	enc_cell_dp,
									self._encoder_inputs,
									initial_state=weight_initializer((1,2*self._n_hidden)),
									dtype=tf.float32,
									sequence_length = self._encoder_unrollings*tf.ones([1],tf.int64),
									scope=scope 																			# cell scope: Encoder/BasicLSTMCell/...
									)
		#END-ENCODER-SCOPE

		#######################################################################################################################
			# transition
		
		#with tf.variable_scope('Transition') as scope:
			#init_state = tf.matmul(last_state,w_trans)+b_trans

		#######################################################################################################################			
		## Decoder loop
		with tf.variable_scope('Decoder',reuse=(not is_training)) as scope:
			# Definition of the cell computation.
			dec_cell = CustomLSTMCell(self._n_hidden, forget_bias=1.0, input_size=self._y_size)
			dec_cell_dp = tf.nn.rnn_cell.DropoutWrapper(dec_cell,output_keep_prob=keep_prob)
			logits = []

			dec_outs,_ = tf.nn.rnn(dec_cell_dp,
									 inputs = self._world_state_vectors,
									 #initial_state=init_state,
									 initial_state=last_state,
									 dtype=tf.float32,
									 sequence_length = self._decoder_unrollings*tf.ones([1],tf.int64),
									 scope=scope 																			# cell scope: Decoder/BasicLSTMCell/...
									)
			self._train_predictions = []
			for out in dec_outs:
				logit = tf.matmul(out,wo)+bo
				logits.append(logit)
				self._train_predictions.append( tf.nn.softmax(logit,name='prediction') )

			#for _ in range(len(dec_outs),self._max_decoder_unrollings):
			#	logits.append(tf.zeros([1,self._num_actions]))

			# Loss definition
			nopad_dec_outputs = self._decoder_outputs[:len(dec_outs)]
			self._loss = tf.nn.seq2seq.sequence_loss(logits,
																 targets=nopad_dec_outputs,
																 weights=[tf.ones([1],dtype=tf.float32)]*self._max_decoder_unrollings,
																 name='train_loss')
			#scope.reuse_variables()
			#END-DECODER-SCOPE
		###################################################################################################################
		# TESTING
		# Decode one step at a time, the world state vector is defined externally
		with tf.variable_scope('Encoder',reuse=True) as scope:
			test_hs,test_last_st = tf.nn.rnn( enc_cell,
														 self._encoder_inputs,
														 initial_state=weight_initializer((1,2*self._n_hidden)),
														 dtype=tf.float32,
														 sequence_length = self._encoder_unrollings*tf.ones([1],tf.int64),
														 scope=scope
														)

		#test_c_last, test_h_last = tf.split(1, 2, test_last_st)
		self._test_c0, self._test_s0 = tf.split(1, 2, test_last_st)

		#self._test_s0 = tf.tanh( tf.matmul(test_h_last,w_trans_s)+b_trans_s , name='test_s0')
		#self._test_c0 = tf.tanh( tf.matmul(test_c_last,w_trans_c)+b_trans_c , name='test_c0')
		
		with tf.variable_scope('Decoder',reuse=True) as scope:
			_,temp = dec_cell(self._test_yt,tf.concat(1, [self._test_ct, self._test_st]),scope="CustomLSTMCell")	# joins scopes -> Decoder/BasicLSTMCell
			self._next_ct,self._next_st = tf.split(1, 2, temp)

		# softmax layer,
		logit = tf.matmul(self._next_st,wo) + bo
		self._test_prediction = tf.nn.softmax(logit,name='inf_prediction')
		# Loss definition
		self._test_loss = tf.nn.softmax_cross_entropy_with_logits(logit,self._test_decoder_output, name="test_loss")
			

		with tf.variable_scope('Optimization') as scope:
			# Optimizer setup			
			self._global_step = tf.Variable(0,trainable=False)
			
			self._learning_rate = tf.train.exponential_decay(self._init_learning_rate,
															 self._global_step, 
															 80000,
															 self._learning_rate_decay_factor,
															 staircase=True)
			
			#params = tf.trainable_variables()
			#optimizer = tf.train.GradientDescentOptimizer(self._learning_rate)
			optimizer = tf.train.AdamOptimizer(learning_rate=self._init_learning_rate,
														  epsilon=1e-1)
			
			# Gradient clipping
			#gradients = tf.gradients(self._loss,params)
			gradients,params = zip(*optimizer.compute_gradients(self._loss))
			self._clipped_gradients, self._global_norm = tf.clip_by_global_norm(gradients, self._max_gradient_norm)
			# Apply clipped gradients
			self._optimizer = optimizer.apply_gradients( zip(self._clipped_gradients, params), global_step=self._global_step )

		#########################################################################################################
		if is_training:
			with tf.name_scope('Summaries') as scope:
				### Summaries
				clipped_resh = [tf.reshape(tensor,[-1]) for tensor in self._clipped_gradients if tensor]
				clipped_resh = tf.concat(0,clipped_resh)

				# weight summaries
				temp = tf.trainable_variables()
				ow = [tf.reshape(tensor,[-1]) for tensor in temp[:2]]
				ow = tf.concat(0,ow)
				encw = [tf.reshape(tensor,[-1]) for tensor in temp[2:4]]
				encw = tf.concat(0,encw)
				decw = [tf.reshape(tensor,[-1]) for tensor in temp[4:6]]
				decw = tf.concat(0,decw)

				# sum strings
				_ = tf.scalar_summary("loss",self._loss)
				_ = tf.scalar_summary('global_norm',self._global_norm)
				_ = tf.scalar_summary('learning rate',self._learning_rate)
				_ = tf.histogram_summary('clipped_gradients', clipped_resh)
				_ = tf.histogram_summary('output weights', ow)
				_ = tf.histogram_summary('encoder w', encw)
				_ = tf.histogram_summary('decoder w', decw)
				self._merged = tf.merge_all_summaries()

				# include accuracies as summaries
				self._train_acc = tf.placeholder(tf.float32,name='train_accuracy')
				self._val_acc   = tf.placeholder(tf.float32,name='val_accuracy')
				self._train_acc_sum = tf.scalar_summary("Training accuracy",self._train_acc)
				self._val_acc_sum = tf.scalar_summary("Validation accuracy",self._val_acc)

		# checkpoint saver
		#self.saver = tf.train.Saver(tf.all_variables())

		self.vars_to_init = set(tf.all_variables()) - set(tf.trainable_variables())
		self.saver = tf.train.Saver(tf.trainable_variables())
		

		self.kk=0
	def __init__(self, config):
		# Maps' feature dictionary
		self._map_feature_dict = get_landmark_set(self._maps['grid']) # all featureas are the same for each map
		self._map_objects_dict = get_objects_set(self._maps['grid'])

		self._batch_size 				= config.batch_size
		self._encoder_unrollings 	= config.encoder_unrollings
		self._decoder_unrollings 	= config.decoder_unrollings
		self._num_actions				= config.num_actions
		self._vocab_size 				= config.vocab_size
		self._y_size  					= 4*len(self._map_feature_dict) + len(self._map_objects_dict)

		# Model parameters
		self._n_hidden 					 	= config.num_nodes 	# same for encoder and decoder
		self._embedding_world_state_size = config.embedding_world_state_size

		self._init_learning_rate 			= tf.constant(config.learning_rate)		
		self._learning_rate 		 			= self._init_learning_rate
		self._learning_rate_decay_factor = config.learning_rate_decay_factor

		self._max_gradient_norm	= config.max_gradient_norm
		
		# debug parameters
		self._train_dir = "tmp/"			# dir where checkpoint files will be saves
		
		# merging and writing vars
		self._writer = None


		# Dropout rate
		keep_prob = config.dropout_rate

		## TRAINING Placeholders
		self._encoder_inputs = []
		self._decoder_outputs = []
		self._world_state_vectors = [] 	# original sample structure, containing complete path, start_pos, end_pos, and map name
		for i in xrange(self._encoder_unrollings):
			self._encoder_inputs.append( tf.placeholder(tf.float32,shape=[self._batch_size,self._vocab_size], name='x') )
		for i in xrange(self._decoder_unrollings):
			self._decoder_outputs.append( tf.placeholder(tf.int32,shape=[self._batch_size], name='actions') )
			self._world_state_vectors.append( tf.placeholder(tf.float32,shape=[self._batch_size,self._y_size], name='world_vect') )

		## TESTING / VALIDATION Placeholders
		self._test_encoder_inputs = []
		for i in xrange(self._encoder_unrollings):
			self._test_encoder_inputs.append( tf.placeholder(tf.float32,shape=[1,self._vocab_size], name='test_x') )
		self._test_decoder_output = tf.placeholder(tf.float32,shape=[1,self._num_actions], name='test_action_t')
		self._test_st = tf.placeholder(tf.float32, [1, self._n_hidden], name='test_st')
		self._test_ct = tf.placeholder(tf.float32, [1, self._n_hidden], name='test_ct')
		self._test_yt = tf.placeholder(tf.float32, [1, self._y_size], name='test_yt')

		with tf.name_scope('Weights') as scope:
			# Alignment model weights
			W_a = tf.Variable(tf.truncated_normal([self._n_hidden	 , self._n_hidden], -0.1, 0.1), name='W_a')
			U_a = tf.Variable(tf.truncated_normal([self._vocab_size, self._n_hidden], -0.1, 0.1), name='U_a')
			V_a = tf.Variable(tf.truncated_normal([2*self._n_hidden, self._n_hidden], -0.1, 0.1), name='V_a')
			v_a = tf.Variable(tf.truncated_normal([self._n_hidden	 ,1], -0.1, 0.1), name='v_a')
			tanh_bias_a = tf.Variable(tf.truncated_normal([1,1], -0.1, 0.1), name='tanh_bias_a')
			bias_a = tf.Variable(tf.zeros([1, self._n_hidden]), name='linear_bias_a')

			## Decoder variables
			# Input gate: input, previous output, context vector, and bias.
			ix = tf.Variable(tf.truncated_normal([self._embedding_world_state_size	 , self._n_hidden], -0.1, 0.1), name='ix')
			im = tf.Variable(tf.truncated_normal([self._n_hidden						 	 , self._n_hidden], -0.1, 0.1), name='ih')
			iz = tf.Variable(tf.truncated_normal([2*self._n_hidden + self._vocab_size, self._n_hidden], -0.1, 0.1), name='iz')
			ib = tf.Variable(tf.zeros([1, self._n_hidden]), name='ib')
			# Forget gate: input, previous output, context vector, and bias.
			fx = tf.Variable(tf.truncated_normal([self._embedding_world_state_size	 , self._n_hidden], -0.1, 0.1), name='fx')
			fm = tf.Variable(tf.truncated_normal([self._n_hidden							 , self._n_hidden], -0.1, 0.1), name='fh')
			fz = tf.Variable(tf.truncated_normal([2*self._n_hidden + self._vocab_size, self._n_hidden], -0.1, 0.1), name='fz')
			fb = tf.Variable(tf.zeros([1, self._n_hidden]), name='fb')
			# Memory cell: input, state, context vector, and bias.                             
			gx = tf.Variable(tf.truncated_normal([self._embedding_world_state_size	 , self._n_hidden], -0.1, 0.1), name='cx')
			gm = tf.Variable(tf.truncated_normal([self._n_hidden							 , self._n_hidden], -0.1, 0.1), name='cc')
			gz = tf.Variable(tf.truncated_normal([2*self._n_hidden + self._vocab_size, self._n_hidden], -0.1, 0.1), name='cz')
			gb = tf.Variable(tf.zeros([1, self._n_hidden]), name='cb')
			# Output gate: input, previous output, context vector, and bias.
			ox = tf.Variable(tf.truncated_normal([self._embedding_world_state_size	 , self._n_hidden], -0.1, 0.1), name='ox')
			om = tf.Variable(tf.truncated_normal([self._n_hidden							 , self._n_hidden], -0.1, 0.1), name='oh')
			oz = tf.Variable(tf.truncated_normal([2*self._n_hidden + self._vocab_size, self._n_hidden], -0.1, 0.1), name='oz')
			ob = tf.Variable(tf.zeros([1, self._n_hidden]), name='ob')
			# Embedding weight
			w_emby = tf.Variable(tf.truncated_normal([self._y_size,self._embedding_world_state_size], -0.1, 0.1), name='Ey_w')
			b_emby = tf.Variable(tf.zeros([1, self._embedding_world_state_size]), name='Ey_b')
			# Encoder - decoder transition
			w_trans_s = tf.Variable(tf.truncated_normal([self._n_hidden, self._n_hidden], -0.1, 0.1), name='w_trans_s')
			b_trans_s = tf.Variable(tf.zeros([1,self._n_hidden	]), name='b_trans_s')
			w_trans_c = tf.Variable(tf.truncated_normal([self._n_hidden, self._n_hidden], -0.1, 0.1), name='w_trans_c')
			b_trans_c = tf.Variable(tf.zeros([1,self._n_hidden	]), name='b_trans_c')
			# Action Classifier weights and biases.
			ws = tf.Variable(tf.truncated_normal([self._n_hidden							 , self._embedding_world_state_size	], -0.1, 0.1), name='ws')
			wz = tf.Variable(tf.truncated_normal([2*self._n_hidden + self._vocab_size, self._embedding_world_state_size	], -0.1, 0.1), name='wz')
			wo = tf.Variable(tf.truncated_normal([self._embedding_world_state_size	 , self._num_actions						], -0.1, 0.1), name='wo')
			b_q = tf.Variable(tf.zeros([1,self._embedding_world_state_size	]), name='bq')
			b_o = tf.Variable(tf.zeros([1,self._num_actions						]), name='bo')

		#######################################################################################################################
		## Encoder
		with tf.name_scope('Encoder') as scope:
			lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(self._n_hidden, forget_bias=1.0, input_size=self._vocab_size)
			lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(self._n_hidden, forget_bias=1.0, input_size=self._vocab_size)

			def encoder(encoder_inputs, batch_size=self._batch_size, is_training=True):
				fw_cell = lstm_fw_cell
				bw_cell = lstm_bw_cell
				if is_training and keep_prob < 1.0:
					fw_cell = tf.nn.rnn_cell.DropoutWrapper(
											fw_cell, output_keep_prob=keep_prob)
					bw_cell = tf.nn.rnn_cell.DropoutWrapper(
											bw_cell, output_keep_prob=keep_prob)

				h,c1,h1 = bidirectional_rnn(fw_cell,bw_cell,
											 encoder_inputs,
											 dtype=tf.float32,
											 sequence_length = self._encoder_unrollings * tf.ones([batch_size],tf.int64)
											 )
				return h,c1,h1

		def decoder_cell(i, o, z, c_prev):
			input_gate  = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + tf.matmul(z,iz) + ib)
			forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + tf.matmul(z,fz) + fb)
			output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + tf.matmul(z,oz) + ob)
			# gt
			update = tf.tanh(tf.matmul(i, gx) + tf.matmul(o, gm) + tf.matmul(z,gz) + gb)
			# ct
			c_t = forget_gate * c_prev + input_gate * update
			s_t = output_gate * tf.tanh(c_t)
			return s_t, c_t

		# Alignment model
		with tf.name_scope('Aligner') as scope:
			def context_vector(s_prev,h_encoder,ux_vh,encoder_inputs,batch_size):
				# alignment model
				beta = [tf.matmul(tf.tanh(tf.matmul(s_prev,W_a) + u_v + bias_a),v_a) + tanh_bias_a for u_v in ux_vh]
				beta = tf.concat(1,beta, name='beta')
				# weights of each (xj,hj)
				alpha = tf.nn.softmax(beta)	# shape: batch_size x encoder_unroll
				alpha = tf.split(1,self._encoder_unrollings,alpha, name='alpha')	# list of unrolling, each elmt of shape [batch_size x 1]
				z_t = tf.Variable(tf.zeros([batch_size , 2*self._n_hidden + self._vocab_size]), name='z_t')
				for j in xrange(self._encoder_unrollings):
					xh = tf.concat(1,[encoder_inputs[j],h_encoder[j]], name='xhj')  # (x_j, h_j)
					z_t += alpha[j] * xh
				return z_t

			def precalc_Ux_Vh(encoder_inputs,h_enc):
				ux_vh = []
				for i in xrange(self._encoder_unrollings):
					ux_vh.append( tf.matmul(encoder_inputs[i],U_a) + tf.matmul(h_enc[i],V_a) )
				return ux_vh

		#######################################################################################################################

		def model_encoder_decoder(encoder_inputs, world_state_vectors, batch_size):
			h_encoder,c1,h1 = encoder(encoder_inputs)	
			U_V_precalc = precalc_Ux_Vh(encoder_inputs,h_encoder)
			
			## Decoder loop
			with tf.name_scope('Decoder') as scope:
				# Initial states
				s_t = tf.tanh( tf.matmul(h1,w_trans_s)+b_trans_s , name='s_0')
				c_t = tf.tanh( tf.matmul(c1,w_trans_c)+b_trans_c , name='c_0')
				# Definition of the cell computation.

				logits = [] # logits per rolling
				predictions = []
				for i in xrange(self._decoder_unrollings):
					# world state vector at step i
					y_t = world_state_vectors[i]	# batch_size x num_local_feats (feat_id format)
					# embeed world vector | relu nodes
					ey = tf.nn.relu(tf.matmul(y_t,w_emby) + b_emby, name='Ey')
					# context vector
					z_t = context_vector(s_t,h_encoder,U_V_precalc,encoder_inputs,batch_size)
					# Dropout
					ey = tf.nn.dropout(ey, keep_prob)
					s_t,c_t = decoder_cell(ey,s_t,z_t,c_t)
					s_t = tf.nn.dropout(s_t, keep_prob)
					# Hidden linear layer before output, proyects z_t,y_t, and s_t to an embeeding-size layer
					hq = ey + tf.matmul(s_t,ws) + tf.matmul(z_t,wz) + b_q
					# Output layer
					logit = tf.matmul(hq,wo) + b_o
					prediction = tf.nn.softmax(logit,name='prediction')
					logits.append(logit)
					predictions.append(prediction)
				#END-FOR-DECODER-UNROLLING
			#END-DECODER-SCOPE
			return logits,predictions
		#END-MODEL


		with tf.variable_scope('Train_test_pipeline') as scope:
			logits,self._train_predictions = model_encoder_decoder(self._encoder_inputs,
																					 self._world_state_vectors,
																					 batch_size=self._batch_size)
			scope.reuse_variables()

			self._loss = tf.nn.seq2seq.sequence_loss(logits,
																 targets=self._decoder_outputs,
																 weights=[tf.ones(shape=[self._batch_size],dtype=tf.float32) 
																 				for _ in range(self._decoder_unrollings)],
																 name='train_loss')
			# Optimizer setup
			self._global_step = tf.Variable(0,trainable=False)
			"""
			self._learning_rate = tf.train.exponential_decay(self._init_learning_rate,
															 self._global_step, 
															 5000,
															 self._learning_rate_decay_factor,
															 staircase=True)
			"""
			# debug variables
			params = tf.trainable_variables()

			#optimizer = tf.train.GradientDescentOptimizer(self._learning_rate)
			optimizer = tf.train.AdamOptimizer(learning_rate=self._init_learning_rate,
														  epsilon=1e-4)
			# Gradient clipping
			gradients, v = zip(*optimizer.compute_gradients(self._loss,params))
			self._clipped_gradients, self._global_norm = tf.clip_by_global_norm(gradients, self._max_gradient_norm)
			# Apply clipped gradients
			self._optimizer = optimizer.apply_gradients( zip(self._clipped_gradients, v), global_step=self._global_step )

			##############################################################################################################
			## Testing
			test_h,c1,h1 = encoder(self._test_encoder_inputs,1,False)
			self._test_s0 = tf.tanh( tf.matmul(h1,w_trans_s) , name='test_s0')
			self._test_c0 = tf.tanh( tf.matmul(c1,w_trans_c)+b_trans_c , name='test_c0')

			test_ux_vh = precalc_Ux_Vh(self._test_encoder_inputs,test_h)
			# embeed world vector | relu nodes
			ey = tf.nn.relu(tf.matmul(self._test_yt,w_emby) + b_emby, name='Ey_test')
			# context vector
			z_t = context_vector(self._test_st,test_h,test_ux_vh,self._test_encoder_inputs,1)
			self._next_st,self._next_ct = decoder_cell(ey, self._test_st, z_t, self._test_ct)
			# Hidden linear layer before output, proyects z_t,y_t, and s_t to an embeeding-size layer
			hq = ey + tf.matmul(self._next_st,ws) + tf.matmul(z_t,wz) + b_q
			logit = tf.matmul(hq,wo) + b_o
			self._test_prediction = tf.nn.softmax(logit,name='inf_prediction')
			self._test_loss = tf.nn.softmax_cross_entropy_with_logits(logit,self._test_decoder_output, name="test_loss")

		# Summaries
		clipped_resh = [tf.reshape(tensor,[-1]) for tensor in self._clipped_gradients]
		clipped_resh = tf.concat(0,clipped_resh)
		_ = tf.scalar_summary("loss",self._loss)
		_ = tf.scalar_summary('global_norm',self._global_norm)
		_ = tf.scalar_summary('learning rate',self._learning_rate)
		_ = tf.histogram_summary('clipped_gradients', clipped_resh)

		# checkpoint saver
		#self.saver = tf.train.Saver(tf.all_variables())
		self._merged = tf.merge_all_summaries()
Example #3
0
    def __init__(self, config, is_training=True):
        # Maps' feature dictionary
        self._map_feature_dict = get_landmark_set(
            self._maps['grid'])  # all featureas are the same for each map
        self._map_objects_dict = get_objects_set(self._maps['grid'])

        self._max_encoder_unrollings = config.encoder_unrollings
        self._max_decoder_unrollings = config.decoder_unrollings
        self._num_actions = config.num_actions
        self._vocab_size = config.vocab_size
        self._y_size = 4 * len(self._map_feature_dict) + len(
            self._map_objects_dict)

        # Model parameters
        self._n_hidden = config.num_nodes  # same for encoder and decoder
        self._embedding_world_state_size = config.embedding_world_state_size

        self._init_learning_rate = tf.constant(config.learning_rate)
        self._learning_rate = self._init_learning_rate
        self._learning_rate_decay_factor = config.learning_rate_decay_factor

        self._max_gradient_norm = config.max_gradient_norm

        # debug parameters
        self._train_dir = "tmp/"  # dir where checkpoint files will be saves

        # merging and writing vars
        self._writer = None

        # Dropout rate
        keep_prob = config.dropout_rate

        ## TRAINING Placeholders
        self._encoder_inputs = []
        self._encoder_unrollings = tf.placeholder('int64')

        self._decoder_outputs = []
        self._decoder_unrollings = tf.placeholder('int64')
        self._world_state_vectors = [
        ]  # original sample structure, containing complete path, start_pos, end_pos, and map name
        for i in xrange(self._max_encoder_unrollings):
            self._encoder_inputs.append(
                tf.placeholder(tf.float32,
                               shape=[1, self._vocab_size],
                               name='x'))
        for i in xrange(self._max_decoder_unrollings):
            self._decoder_outputs.append(
                tf.placeholder(tf.int32, shape=[1], name='actions'))
            self._world_state_vectors.append(
                tf.placeholder(tf.float32,
                               shape=[1, self._y_size],
                               name='world_vect'))

        ## TESTING / VALIDATION Placeholders
        self._test_st = tf.placeholder(tf.float32, [1, self._n_hidden],
                                       name='test_st')
        self._test_ct = tf.placeholder(tf.float32, [1, self._n_hidden],
                                       name='test_ct')
        self._test_yt = tf.placeholder(tf.float32, [1, self._y_size],
                                       name='test_yt')
        self._test_decoder_output = tf.placeholder(
            tf.float32, shape=[1, self._num_actions], name='test_action')

        with tf.name_scope('Weights') as scope:
            # Alignment model weights
            W_a = tf.Variable(weight_initializer(
                [self._n_hidden, self._n_hidden]),
                              name='W_a')
            U_a = tf.Variable(weight_initializer(
                [self._vocab_size, self._n_hidden]),
                              name='U_a')
            V_a = tf.Variable(weight_initializer(
                [2 * self._n_hidden, self._n_hidden]),
                              name='V_a')
            v_a = tf.Variable(weight_initializer([self._n_hidden, 1]),
                              name='v_a')
            tanh_bias_a = tf.Variable(weight_initializer([1, 1]),
                                      name='tanh_bias_a')
            bias_a = tf.Variable(tf.zeros([1, self._n_hidden]),
                                 name='linear_bias_a')

            # Embedding weight
            w_emby = tf.Variable(weight_initializer(
                [self._y_size, self._embedding_world_state_size]),
                                 name='Ey_w')
            b_emby = tf.Variable(tf.zeros(
                [1, self._embedding_world_state_size]),
                                 name='Ey_b')
            """
			# Encoder - decoder transition
			w_trans_s = tf.Variable(tf.truncated_normal([self._n_hidden, self._n_hidden], -0.1, 0.1), name='w_trans_s')
			b_trans_s = tf.Variable(tf.zeros([1,self._n_hidden	]), name='b_trans_s')
			w_trans_c = tf.Variable(tf.truncated_normal([self._n_hidden, self._n_hidden], -0.1, 0.1), name='w_trans_c')
			b_trans_c = tf.Variable(tf.zeros([1,self._n_hidden	]), name='b_trans_c')
			"""
            # Action Classifier weights and biases.
            ws = tf.Variable(weight_initializer(
                [self._n_hidden, self._embedding_world_state_size]),
                             name='ws')
            wz = tf.Variable(weight_initializer([
                2 * self._n_hidden + self._vocab_size,
                self._embedding_world_state_size
            ]),
                             name='wz')
            wo = tf.Variable(weight_initializer(
                [self._embedding_world_state_size, self._num_actions]),
                             name='wo')
            b_q = tf.Variable(tf.zeros([1, self._embedding_world_state_size]),
                              name='bq')
            b_o = tf.Variable(tf.zeros([1, self._num_actions]), name='bo')

        #######################################################################################################################
        ## Encoder
        with tf.variable_scope('Encoder') as scope:
            fw_cell = CustomLSTMCell(self._n_hidden,
                                     forget_bias=1.0,
                                     input_size=self._vocab_size)
            bw_cell = CustomLSTMCell(self._n_hidden,
                                     forget_bias=1.0,
                                     input_size=self._vocab_size)

            fw_cell_dp = tf.nn.rnn_cell.DropoutWrapper(
                fw_cell, output_keep_prob=keep_prob)
            bw_cell_dp = tf.nn.rnn_cell.DropoutWrapper(
                bw_cell, output_keep_prob=keep_prob)

            h_encoder, c1h1 = bidirectional_rnn(
                fw_cell_dp,
                bw_cell_dp,
                self._encoder_inputs,
                dtype=tf.float32,
                sequence_length=self._encoder_unrollings *
                tf.ones([1], tf.int64),
                scope='Encoder')
        #END-ENCODER-SCOPE

        #######################################################################################################################
        # Alignment model
        with tf.name_scope('Aligner') as scope:

            def context_vector(s_prev, h_encoder, ux_vh, encoder_inputs):
                # alignment model
                beta = []
                for i in xrange(self._max_encoder_unrollings):
                    beta.append(
                        tf.cond(
                            tf.less(tf.constant(i, dtype=tf.int64),
                                    self._encoder_unrollings),
                            lambda: tf.matmul(
                                tf.tanh(
                                    tf.matmul(s_prev, W_a) + ux_vh[i] + bias_a
                                ), v_a) + tanh_bias_a,
                            lambda: tf.zeros([1, 1])))
                beta = tf.concat(1, beta, name='beta')
                # weights of each (xj,hj)
                alpha = tf.nn.softmax(
                    beta)  # shape: batch_size x encoder_unroll
                alpha = tf.split(
                    1, self._max_encoder_unrollings, alpha, name='alpha'
                )  # list of unrolling, each elmt of shape [batch_size x 1]
                z_t = tf.zeros([1, 2 * self._n_hidden + self._vocab_size])

                for j in xrange(self._max_encoder_unrollings):
                    xh = tf.cond(
                        tf.less(tf.constant(j, dtype=tf.int64),
                                self._encoder_unrollings),
                        lambda: tf.concat(1, [encoder_inputs[j], h_encoder[j]],
                                          name='xhj'),  # (x_j, h_j)
                        lambda: tf.zeros(
                            [1, 2 * self._n_hidden + self._vocab_size]))
                    z_t += alpha[j] * xh
                return z_t

            def precalc_Ux_Vh(encoder_inputs, h_enc):
                ux_vh = []
                for i in xrange(self._max_encoder_unrollings):
                    ux_vh.append(
                        tf.cond(
                            tf.less(tf.constant(i, dtype=tf.int64),
                                    self._encoder_unrollings),
                            lambda: tf.matmul(encoder_inputs[i], U_a
                                              ) + tf.matmul(h_enc[i], V_a),
                            lambda: tf.zeros([1, self._n_hidden])))
                return ux_vh

            U_V_precalc = precalc_Ux_Vh(self._encoder_inputs, h_encoder)

        #######################################################################################################################
        ## Decoder loop
        with tf.variable_scope('Decoder') as scope:
            dec_cell = CustomLSTMCell(self._n_hidden,
                                      forget_bias=1.0,
                                      input_size=self._vocab_size)
            dec_cell_dp = tf.nn.rnn_cell.DropoutWrapper(
                dec_cell, output_keep_prob=keep_prob)
            # Initial states
            #s_t = tf.tanh( tf.matmul(h1,w_trans_s)+b_trans_s , name='s_0')
            #c_t = tf.tanh( tf.matmul(c1,w_trans_c)+b_trans_c , name='c_0')
            c_t, s_t = tf.split(1, 2, c1h1)
            state = c1h1

            logits = []  # logits per rolling
            self._train_predictions = []
            for i in xrange(self._max_decoder_unrollings):
                if i > 0: tf.get_variable_scope().reuse_variables()
                # world state vector at step i
                y_t = tf.cond(
                    tf.less(tf.constant(i, dtype=tf.int64),
                            self._decoder_unrollings),
                    lambda: self._world_state_vectors[
                        i],  # batch_size x num_local_feats (feat_id format)
                    lambda: tf.zeros([1, self._y_size]))
                # embeed world vector | relu nodes
                ey = tf.nn.relu(tf.matmul(y_t, w_emby) + b_emby, name='Ey')
                # run attention mechanism
                z_t = context_vector(s_t, h_encoder, U_V_precalc,
                                     self._encoder_inputs)

                # merge inputs and attention prev state
                dec_input = tf.concat(1, [ey, z_t])
                s_t, state = dec_cell_dp(dec_input,
                                         state,
                                         scope="CustomLSTMCell")

                # Hidden linear layer before output, proyects z_t,y_t, and s_t to an embeeding-size layer
                hq = ey + tf.matmul(s_t, ws) + tf.matmul(z_t, wz) + b_q
                # Output layer
                logit = tf.matmul(hq, wo) + b_o
                fill_pred = tf.constant([[0., 0., 0., 0.,
                                          1.]])  # one-hot vector for PAD
                prediction = tf.cond(
                    tf.less(tf.constant(i, dtype=tf.int64),
                            self._decoder_unrollings),
                    lambda: tf.nn.softmax(logit, name='prediction'),
                    lambda: fill_pred)
                logits.append(logit)
                self._train_predictions.append(prediction)
            #END-FOR-DECODER-UNROLLING
            # Loss definition
            reshaped_dec_outputs = []
            for i in xrange(self._max_decoder_unrollings):
                out = tf.cond(
                    tf.less(tf.constant(i, dtype=tf.int64),
                            self._decoder_unrollings),
                    lambda: self._decoder_outputs[i],
                    lambda: 4 * tf.ones([1], dtype=tf.int32))
                reshaped_dec_outputs.append(out)
            self._loss = tf.nn.seq2seq.sequence_loss(
                logits,
                targets=reshaped_dec_outputs,
                weights=[tf.ones([1], dtype=tf.float32)] *
                self._max_decoder_unrollings,
                #np.ones(shape=(self._max_decoder_unrollings,1),dtype=np.float32),
                name='train_loss')

        ###################################################################################################################
        # TESTING
        with tf.variable_scope('Encoder', reuse=True) as scope:
            test_h, c1h1 = bidirectional_rnn(
                fw_cell,
                bw_cell,
                self._encoder_inputs,
                dtype=tf.float32,
                sequence_length=self._encoder_unrollings *
                tf.ones([1], tf.int64),
                scope='Encoder')

        #self._test_s0 = tf.tanh( tf.matmul(h1,w_trans_s)+b_trans_s, name='test_s0')
        #self._test_c0 = tf.tanh( tf.matmul(c1,w_trans_c)+b_trans_c, name='test_c0')
        self._test_c0, self._test_s0 = tf.split(1, 2, c1h1)

        test_ux_vh = precalc_Ux_Vh(self._encoder_inputs, test_h)

        with tf.variable_scope('Decoder', reuse=True) as scope:
            # embeed world vector | relu nodes
            ey = tf.nn.relu(tf.matmul(self._test_yt, w_emby) + b_emby,
                            name='Ey_test')
            # context vector
            z_t = context_vector(self._test_st, test_h, test_ux_vh,
                                 self._encoder_inputs)

            state = tf.concat(1, [self._test_ct, self._test_st])
            dec_input = tf.concat(1, [ey, z_t])

            _, temp = dec_cell(dec_input, state, scope="CustomLSTMCell")
            self._next_ct, self._next_st = tf.split(1, 2, temp)

            # Hidden linear layer before output, proyects z_t,y_t, and s_t to an embeeding-size layer
            hq = ey + tf.matmul(self._next_st, ws) + tf.matmul(z_t, wz) + b_q
            logit = tf.matmul(hq, wo) + b_o
            self._test_prediction = tf.nn.softmax(logit, name='inf_prediction')
            # Loss definition
            self._test_loss = tf.nn.softmax_cross_entropy_with_logits(
                logit, self._test_decoder_output, name="test_loss")
        #END-DECODER-SCOPE

        with tf.variable_scope('Optimization') as scope:
            # Optimizer setup
            self._global_step = tf.Variable(0, trainable=False)

            self._learning_rate = tf.train.exponential_decay(
                self._init_learning_rate,
                self._global_step,
                80000,
                self._learning_rate_decay_factor,
                staircase=True)

            #optimizer = tf.train.GradientDescentOptimizer(self._learning_rate)
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self._learning_rate, epsilon=1e-1)
            # Gradient clipping
            #gradients = tf.gradients(self._loss,params)
            gradients, params = zip(*optimizer.compute_gradients(self._loss))
            self._clipped_gradients, self._global_norm = tf.clip_by_global_norm(
                gradients, self._max_gradient_norm)
            # Apply clipped gradients
            self._optimizer = optimizer.apply_gradients(
                zip(self._clipped_gradients, params),
                global_step=self._global_step)

        with tf.name_scope('Summaries') as scope:
            # Summaries
            clipped_resh = [
                tf.reshape(tensor, [-1]) for tensor in self._clipped_gradients
                if tensor
            ]
            clipped_resh = tf.concat(0, clipped_resh)
            # weight summaries
            temp = tf.trainable_variables()
            alignw = [tf.reshape(tensor, [-1]) for tensor in temp[:6]]
            alignw = tf.concat(0, alignw)
            eyw = [tf.reshape(tensor, [-1]) for tensor in temp[6:8]]
            eyw = tf.concat(0, eyw)
            how = [tf.reshape(tensor, [-1]) for tensor in temp[8:10]]
            how = tf.concat(0, how)
            ow = [tf.reshape(tensor, [-1]) for tensor in temp[10:13]]
            ow = tf.concat(0, ow)

            encw = [tf.reshape(tensor, [-1]) for tensor in temp[13:17]]
            encw = tf.concat(0, encw)
            decw = [tf.reshape(tensor, [-1]) for tensor in temp[17:19]]
            decw = tf.concat(0, decw)

            # sum strings
            _ = tf.scalar_summary("loss", self._loss)
            _ = tf.scalar_summary('global_norm', self._global_norm)
            _ = tf.scalar_summary('learning rate', self._learning_rate)
            _ = tf.histogram_summary('clipped_gradients', clipped_resh)
            _ = tf.histogram_summary('aligner', alignw)
            _ = tf.histogram_summary('Y embedding', eyw)
            _ = tf.histogram_summary('hidden output layer', how)
            _ = tf.histogram_summary('output layer', ow)
            _ = tf.histogram_summary('encoder w', encw)
            _ = tf.histogram_summary('decoder w', decw)

            self._merged = tf.merge_all_summaries()

            # include accuracies as summaries
            self._train_acc = tf.placeholder(tf.float32, name='train_accuracy')
            self._val_acc = tf.placeholder(tf.float32, name='val_accuracy')
            self._train_acc_sum = tf.scalar_summary("Training accuracy",
                                                    self._train_acc)
            self._val_acc_sum = tf.scalar_summary("Validation accuracy",
                                                  self._val_acc)

        # checkpoint saver
        self.saver = tf.train.Saver(tf.all_variables())
        self.vars_to_init = set(tf.all_variables()) - set(
            tf.trainable_variables())
        self.saver = tf.train.Saver(tf.trainable_variables())
    def __init__(self, config):
        # Maps' feature dictionary
        self._map_feature_dict = get_landmark_set(
            self._maps['grid'])  # all featureas are the same for each map
        self._map_objects_dict = get_objects_set(self._maps['grid'])

        self._batch_size = config.batch_size
        self._encoder_unrollings = config.encoder_unrollings
        self._decoder_unrollings = config.decoder_unrollings
        self._num_actions = config.num_actions
        self._vocab_size = config.vocab_size
        self._y_size = 4 * len(self._map_feature_dict) + len(
            self._map_objects_dict)

        # Model parameters
        self._n_hidden = config.num_nodes  # same for encoder and decoder
        self._embedding_world_state_size = config.embedding_world_state_size

        self._init_learning_rate = tf.constant(config.learning_rate)
        self._learning_rate = self._init_learning_rate
        self._learning_rate_decay_factor = config.learning_rate_decay_factor

        self._max_gradient_norm = config.max_gradient_norm

        # debug parameters
        self._train_dir = "tmp/"  # dir where checkpoint files will be saves

        # merging and writing vars
        self._writer = None

        # Dropout rate
        keep_prob = config.dropout_rate

        ## TRAINING Placeholders
        self._encoder_inputs = []
        self._decoder_outputs = []
        self._world_state_vectors = [
        ]  # original sample structure, containing complete path, start_pos, end_pos, and map name
        for i in xrange(self._encoder_unrollings):
            self._encoder_inputs.append(
                tf.placeholder(tf.float32,
                               shape=[self._batch_size, self._vocab_size],
                               name='x'))
        for i in xrange(self._decoder_unrollings):
            self._decoder_outputs.append(
                tf.placeholder(tf.int32,
                               shape=[self._batch_size],
                               name='actions'))
            self._world_state_vectors.append(
                tf.placeholder(tf.float32,
                               shape=[self._batch_size, self._y_size],
                               name='world_vect'))

        ## TESTING / VALIDATION Placeholders
        self._test_encoder_inputs = []
        for i in xrange(self._encoder_unrollings):
            self._test_encoder_inputs.append(
                tf.placeholder(tf.float32,
                               shape=[1, self._vocab_size],
                               name='test_x'))
        self._test_decoder_output = tf.placeholder(
            tf.float32, shape=[1, self._num_actions], name='test_action_t')
        self._test_st = tf.placeholder(tf.float32, [1, self._n_hidden],
                                       name='test_st')
        self._test_ct = tf.placeholder(tf.float32, [1, self._n_hidden],
                                       name='test_ct')
        self._test_yt = tf.placeholder(tf.float32, [1, self._y_size],
                                       name='test_yt')

        with tf.name_scope('Weights') as scope:
            # Alignment model weights
            W_a = tf.Variable(tf.truncated_normal(
                [self._n_hidden, self._n_hidden], -0.1, 0.1),
                              name='W_a')
            U_a = tf.Variable(tf.truncated_normal(
                [self._vocab_size, self._n_hidden], -0.1, 0.1),
                              name='U_a')
            V_a = tf.Variable(tf.truncated_normal(
                [2 * self._n_hidden, self._n_hidden], -0.1, 0.1),
                              name='V_a')
            v_a = tf.Variable(tf.truncated_normal([self._n_hidden, 1], -0.1,
                                                  0.1),
                              name='v_a')
            tanh_bias_a = tf.Variable(tf.truncated_normal([1, 1], -0.1, 0.1),
                                      name='tanh_bias_a')
            bias_a = tf.Variable(tf.zeros([1, self._n_hidden]),
                                 name='linear_bias_a')

            ## Decoder variables
            # Input gate: input, previous output, context vector, and bias.
            ix = tf.Variable(tf.truncated_normal(
                [self._embedding_world_state_size, self._n_hidden], -0.1, 0.1),
                             name='ix')
            im = tf.Variable(tf.truncated_normal(
                [self._n_hidden, self._n_hidden], -0.1, 0.1),
                             name='ih')
            iz = tf.Variable(tf.truncated_normal(
                [2 * self._n_hidden + self._vocab_size, self._n_hidden], -0.1,
                0.1),
                             name='iz')
            ib = tf.Variable(tf.zeros([1, self._n_hidden]), name='ib')
            # Forget gate: input, previous output, context vector, and bias.
            fx = tf.Variable(tf.truncated_normal(
                [self._embedding_world_state_size, self._n_hidden], -0.1, 0.1),
                             name='fx')
            fm = tf.Variable(tf.truncated_normal(
                [self._n_hidden, self._n_hidden], -0.1, 0.1),
                             name='fh')
            fz = tf.Variable(tf.truncated_normal(
                [2 * self._n_hidden + self._vocab_size, self._n_hidden], -0.1,
                0.1),
                             name='fz')
            fb = tf.Variable(tf.zeros([1, self._n_hidden]), name='fb')
            # Memory cell: input, state, context vector, and bias.
            gx = tf.Variable(tf.truncated_normal(
                [self._embedding_world_state_size, self._n_hidden], -0.1, 0.1),
                             name='cx')
            gm = tf.Variable(tf.truncated_normal(
                [self._n_hidden, self._n_hidden], -0.1, 0.1),
                             name='cc')
            gz = tf.Variable(tf.truncated_normal(
                [2 * self._n_hidden + self._vocab_size, self._n_hidden], -0.1,
                0.1),
                             name='cz')
            gb = tf.Variable(tf.zeros([1, self._n_hidden]), name='cb')
            # Output gate: input, previous output, context vector, and bias.
            ox = tf.Variable(tf.truncated_normal(
                [self._embedding_world_state_size, self._n_hidden], -0.1, 0.1),
                             name='ox')
            om = tf.Variable(tf.truncated_normal(
                [self._n_hidden, self._n_hidden], -0.1, 0.1),
                             name='oh')
            oz = tf.Variable(tf.truncated_normal(
                [2 * self._n_hidden + self._vocab_size, self._n_hidden], -0.1,
                0.1),
                             name='oz')
            ob = tf.Variable(tf.zeros([1, self._n_hidden]), name='ob')
            # Embedding weight
            w_emby = tf.Variable(tf.truncated_normal(
                [self._y_size, self._embedding_world_state_size], -0.1, 0.1),
                                 name='Ey_w')
            b_emby = tf.Variable(tf.zeros(
                [1, self._embedding_world_state_size]),
                                 name='Ey_b')
            # Encoder - decoder transition
            w_trans_s = tf.Variable(tf.truncated_normal(
                [self._n_hidden, self._n_hidden], -0.1, 0.1),
                                    name='w_trans_s')
            b_trans_s = tf.Variable(tf.zeros([1, self._n_hidden]),
                                    name='b_trans_s')
            w_trans_c = tf.Variable(tf.truncated_normal(
                [self._n_hidden, self._n_hidden], -0.1, 0.1),
                                    name='w_trans_c')
            b_trans_c = tf.Variable(tf.zeros([1, self._n_hidden]),
                                    name='b_trans_c')
            # Action Classifier weights and biases.
            ws = tf.Variable(tf.truncated_normal(
                [self._n_hidden, self._embedding_world_state_size], -0.1, 0.1),
                             name='ws')
            wz = tf.Variable(tf.truncated_normal([
                2 * self._n_hidden + self._vocab_size,
                self._embedding_world_state_size
            ], -0.1, 0.1),
                             name='wz')
            wo = tf.Variable(tf.truncated_normal(
                [self._embedding_world_state_size, self._num_actions], -0.1,
                0.1),
                             name='wo')
            b_q = tf.Variable(tf.zeros([1, self._embedding_world_state_size]),
                              name='bq')
            b_o = tf.Variable(tf.zeros([1, self._num_actions]), name='bo')

        #######################################################################################################################
        ## Encoder
        with tf.name_scope('Encoder') as scope:
            lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(
                self._n_hidden, forget_bias=1.0, input_size=self._vocab_size)
            lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(
                self._n_hidden, forget_bias=1.0, input_size=self._vocab_size)

            def encoder(encoder_inputs,
                        batch_size=self._batch_size,
                        is_training=True):
                fw_cell = lstm_fw_cell
                bw_cell = lstm_bw_cell
                if is_training and keep_prob < 1.0:
                    fw_cell = tf.nn.rnn_cell.DropoutWrapper(
                        fw_cell, output_keep_prob=keep_prob)
                    bw_cell = tf.nn.rnn_cell.DropoutWrapper(
                        bw_cell, output_keep_prob=keep_prob)

                h, c1, h1 = bidirectional_rnn(
                    fw_cell,
                    bw_cell,
                    encoder_inputs,
                    dtype=tf.float32,
                    sequence_length=self._encoder_unrollings *
                    tf.ones([batch_size], tf.int64))
                return h, c1, h1

        def decoder_cell(i, o, z, c_prev):
            input_gate = tf.sigmoid(
                tf.matmul(i, ix) + tf.matmul(o, im) + tf.matmul(z, iz) + ib)
            forget_gate = tf.sigmoid(
                tf.matmul(i, fx) + tf.matmul(o, fm) + tf.matmul(z, fz) + fb)
            output_gate = tf.sigmoid(
                tf.matmul(i, ox) + tf.matmul(o, om) + tf.matmul(z, oz) + ob)
            # gt
            update = tf.tanh(
                tf.matmul(i, gx) + tf.matmul(o, gm) + tf.matmul(z, gz) + gb)
            # ct
            c_t = forget_gate * c_prev + input_gate * update
            s_t = output_gate * tf.tanh(c_t)
            return s_t, c_t

        # Alignment model
        with tf.name_scope('Aligner') as scope:

            def context_vector(s_prev, h_encoder, ux_vh, encoder_inputs,
                               batch_size):
                # alignment model
                beta = [
                    tf.matmul(tf.tanh(tf.matmul(s_prev, W_a) + u_v + bias_a),
                              v_a) + tanh_bias_a for u_v in ux_vh
                ]
                beta = tf.concat(1, beta, name='beta')
                # weights of each (xj,hj)
                alpha = tf.nn.softmax(
                    beta)  # shape: batch_size x encoder_unroll
                alpha = tf.split(
                    1, self._encoder_unrollings, alpha, name='alpha'
                )  # list of unrolling, each elmt of shape [batch_size x 1]
                z_t = tf.Variable(tf.zeros(
                    [batch_size, 2 * self._n_hidden + self._vocab_size]),
                                  name='z_t')
                for j in xrange(self._encoder_unrollings):
                    xh = tf.concat(1, [encoder_inputs[j], h_encoder[j]],
                                   name='xhj')  # (x_j, h_j)
                    z_t += alpha[j] * xh
                return z_t

            def precalc_Ux_Vh(encoder_inputs, h_enc):
                ux_vh = []
                for i in xrange(self._encoder_unrollings):
                    ux_vh.append(
                        tf.matmul(encoder_inputs[i], U_a) +
                        tf.matmul(h_enc[i], V_a))
                return ux_vh

        #######################################################################################################################

        def model_encoder_decoder(encoder_inputs, world_state_vectors,
                                  batch_size):
            h_encoder, c1, h1 = encoder(encoder_inputs)
            U_V_precalc = precalc_Ux_Vh(encoder_inputs, h_encoder)

            ## Decoder loop
            with tf.name_scope('Decoder') as scope:
                # Initial states
                s_t = tf.tanh(tf.matmul(h1, w_trans_s) + b_trans_s, name='s_0')
                c_t = tf.tanh(tf.matmul(c1, w_trans_c) + b_trans_c, name='c_0')
                # Definition of the cell computation.

                logits = []  # logits per rolling
                predictions = []
                for i in xrange(self._decoder_unrollings):
                    # world state vector at step i
                    y_t = world_state_vectors[
                        i]  # batch_size x num_local_feats (feat_id format)
                    # embeed world vector | relu nodes
                    ey = tf.nn.relu(tf.matmul(y_t, w_emby) + b_emby, name='Ey')
                    # context vector
                    z_t = context_vector(s_t, h_encoder, U_V_precalc,
                                         encoder_inputs, batch_size)
                    # Dropout
                    ey = tf.nn.dropout(ey, keep_prob)
                    s_t, c_t = decoder_cell(ey, s_t, z_t, c_t)
                    s_t = tf.nn.dropout(s_t, keep_prob)
                    # Hidden linear layer before output, proyects z_t,y_t, and s_t to an embeeding-size layer
                    hq = ey + tf.matmul(s_t, ws) + tf.matmul(z_t, wz) + b_q
                    # Output layer
                    logit = tf.matmul(hq, wo) + b_o
                    prediction = tf.nn.softmax(logit, name='prediction')
                    logits.append(logit)
                    predictions.append(prediction)
                #END-FOR-DECODER-UNROLLING
            #END-DECODER-SCOPE
            return logits, predictions

        #END-MODEL

        with tf.variable_scope('Train_test_pipeline') as scope:
            logits, self._train_predictions = model_encoder_decoder(
                self._encoder_inputs,
                self._world_state_vectors,
                batch_size=self._batch_size)
            scope.reuse_variables()

            self._loss = tf.nn.seq2seq.sequence_loss(
                logits,
                targets=self._decoder_outputs,
                weights=[
                    tf.ones(shape=[self._batch_size], dtype=tf.float32)
                    for _ in range(self._decoder_unrollings)
                ],
                name='train_loss')
            # Optimizer setup
            self._global_step = tf.Variable(0, trainable=False)
            """
			self._learning_rate = tf.train.exponential_decay(self._init_learning_rate,
															 self._global_step, 
															 5000,
															 self._learning_rate_decay_factor,
															 staircase=True)
			"""
            # debug variables
            params = tf.trainable_variables()

            #optimizer = tf.train.GradientDescentOptimizer(self._learning_rate)
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self._init_learning_rate, epsilon=1e-4)
            # Gradient clipping
            gradients, v = zip(
                *optimizer.compute_gradients(self._loss, params))
            self._clipped_gradients, self._global_norm = tf.clip_by_global_norm(
                gradients, self._max_gradient_norm)
            # Apply clipped gradients
            self._optimizer = optimizer.apply_gradients(
                zip(self._clipped_gradients, v), global_step=self._global_step)

            ##############################################################################################################
            ## Testing
            test_h, c1, h1 = encoder(self._test_encoder_inputs, 1, False)
            self._test_s0 = tf.tanh(tf.matmul(h1, w_trans_s), name='test_s0')
            self._test_c0 = tf.tanh(tf.matmul(c1, w_trans_c) + b_trans_c,
                                    name='test_c0')

            test_ux_vh = precalc_Ux_Vh(self._test_encoder_inputs, test_h)
            # embeed world vector | relu nodes
            ey = tf.nn.relu(tf.matmul(self._test_yt, w_emby) + b_emby,
                            name='Ey_test')
            # context vector
            z_t = context_vector(self._test_st, test_h, test_ux_vh,
                                 self._test_encoder_inputs, 1)
            self._next_st, self._next_ct = decoder_cell(
                ey, self._test_st, z_t, self._test_ct)
            # Hidden linear layer before output, proyects z_t,y_t, and s_t to an embeeding-size layer
            hq = ey + tf.matmul(self._next_st, ws) + tf.matmul(z_t, wz) + b_q
            logit = tf.matmul(hq, wo) + b_o
            self._test_prediction = tf.nn.softmax(logit, name='inf_prediction')
            self._test_loss = tf.nn.softmax_cross_entropy_with_logits(
                logit, self._test_decoder_output, name="test_loss")

        # Summaries
        clipped_resh = [
            tf.reshape(tensor, [-1]) for tensor in self._clipped_gradients
        ]
        clipped_resh = tf.concat(0, clipped_resh)
        _ = tf.scalar_summary("loss", self._loss)
        _ = tf.scalar_summary('global_norm', self._global_norm)
        _ = tf.scalar_summary('learning rate', self._learning_rate)
        _ = tf.histogram_summary('clipped_gradients', clipped_resh)

        # checkpoint saver
        #self.saver = tf.train.Saver(tf.all_variables())
        self._merged = tf.merge_all_summaries()
	def __init__(self, config, is_training=True):
		# Maps' feature dictionary
		self._map_feature_dict = get_landmark_set(self._maps['grid']) # all featureas are the same for each map
		self._map_objects_dict = get_objects_set(self._maps['grid'])

		self._max_encoder_unrollings 	= config.encoder_unrollings
		self._max_decoder_unrollings 	= config.decoder_unrollings
		self._num_actions				= config.num_actions
		self._vocab_size 				= config.vocab_size
		self._y_size  					= 4*len(self._map_feature_dict) + len(self._map_objects_dict)

		# Model parameters
		self._n_hidden 					 	= config.num_nodes 	# same for encoder and decoder
		self._embedding_world_state_size = config.embedding_world_state_size

		self._init_learning_rate 			= tf.constant(config.learning_rate)		
		self._learning_rate 		 			= self._init_learning_rate
		self._learning_rate_decay_factor = config.learning_rate_decay_factor

		self._max_gradient_norm	= config.max_gradient_norm
		
		# debug parameters
		self._train_dir = "tmp/"			# dir where checkpoint files will be saves
		
		# merging and writing vars
		self._writer = None

		# Dropout rate
		keep_prob = config.dropout_rate

		## TRAINING Placeholders
		self._encoder_inputs = []
		self._encoder_unrollings = tf.placeholder('int64')

		self._decoder_outputs = []
		self._decoder_unrollings = tf.placeholder('int64')
		self._world_state_vectors = [] 	# original sample structure, containing complete path, start_pos, end_pos, and map name
		for i in xrange(self._max_encoder_unrollings):
			self._encoder_inputs.append( tf.placeholder(tf.float32,shape=[1,self._vocab_size], name='x') )
		for i in xrange(self._max_decoder_unrollings):
			self._decoder_outputs.append( tf.placeholder(tf.int32,shape=[1], name='actions') )
			self._world_state_vectors.append( tf.placeholder(tf.float32,shape=[1,self._y_size], name='world_vect') )

		## TESTING / VALIDATION Placeholders
		self._test_st = tf.placeholder(tf.float32, [1, self._n_hidden], name='test_st')
		self._test_ct = tf.placeholder(tf.float32, [1, self._n_hidden], name='test_ct')
		self._test_yt = tf.placeholder(tf.float32, [1, self._y_size], name='test_yt')
		self._test_decoder_output = tf.placeholder(tf.float32,shape=[1,self._num_actions], name='test_action')

		with tf.name_scope('Weights') as scope:
			# Alignment model weights
			W_a = tf.Variable(weight_initializer([self._n_hidden	 , self._n_hidden]), name='W_a')
			U_a = tf.Variable(weight_initializer([self._vocab_size, self._n_hidden]), name='U_a')
			V_a = tf.Variable(weight_initializer([2*self._n_hidden, self._n_hidden]), name='V_a')
			v_a = tf.Variable(weight_initializer([self._n_hidden	 ,1]), name='v_a')
			tanh_bias_a = tf.Variable(weight_initializer([1,1]), name='tanh_bias_a')
			bias_a = tf.Variable(tf.zeros([1, self._n_hidden]), name='linear_bias_a')

			# Embedding weight
			w_emby = tf.Variable(weight_initializer([self._y_size,self._embedding_world_state_size]), name='Ey_w')
			b_emby = tf.Variable(tf.zeros([1, self._embedding_world_state_size]), name='Ey_b')
			"""
			# Encoder - decoder transition
			w_trans_s = tf.Variable(tf.truncated_normal([self._n_hidden, self._n_hidden], -0.1, 0.1), name='w_trans_s')
			b_trans_s = tf.Variable(tf.zeros([1,self._n_hidden	]), name='b_trans_s')
			w_trans_c = tf.Variable(tf.truncated_normal([self._n_hidden, self._n_hidden], -0.1, 0.1), name='w_trans_c')
			b_trans_c = tf.Variable(tf.zeros([1,self._n_hidden	]), name='b_trans_c')
			"""
			# Action Classifier weights and biases.
			ws = tf.Variable(weight_initializer([self._n_hidden							  , self._embedding_world_state_size]), name='ws')
			wz = tf.Variable(weight_initializer([2*self._n_hidden + self._vocab_size, self._embedding_world_state_size]), name='wz')
			wo = tf.Variable(weight_initializer([self._embedding_world_state_size	  , self._num_actions					]), name='wo')
			b_q = tf.Variable(tf.zeros([1,self._embedding_world_state_size	]), name='bq')
			b_o = tf.Variable(tf.zeros([1,self._num_actions						]), name='bo')

		#######################################################################################################################
		## Encoder
		with tf.variable_scope('Encoder') as scope:
			fw_cell = CustomLSTMCell(self._n_hidden, forget_bias=1.0, input_size=self._vocab_size)
			bw_cell = CustomLSTMCell(self._n_hidden, forget_bias=1.0, input_size=self._vocab_size)

			fw_cell_dp = tf.nn.rnn_cell.DropoutWrapper(
									fw_cell, output_keep_prob=keep_prob)
			bw_cell_dp = tf.nn.rnn_cell.DropoutWrapper(
									bw_cell, output_keep_prob=keep_prob)

			h_encoder,c1h1 = bidirectional_rnn(fw_cell_dp,bw_cell_dp,
										 self._encoder_inputs,
										 dtype=tf.float32,
										 sequence_length = self._encoder_unrollings*tf.ones([1],tf.int64),
										 scope='Encoder'
										 )
		#END-ENCODER-SCOPE

		#######################################################################################################################
		# Alignment model
		with tf.name_scope('Aligner') as scope:
			def context_vector(s_prev,h_encoder,ux_vh,encoder_inputs):
				# alignment model
				beta = []
				for i in xrange(self._max_encoder_unrollings):
					beta.append( tf.cond( tf.less(tf.constant(i,dtype=tf.int64),self._encoder_unrollings),
												 lambda: tf.matmul(tf.tanh(tf.matmul(s_prev,W_a) + ux_vh[i] + bias_a),v_a) + tanh_bias_a,
												 lambda: tf.zeros([1,1])
												)
									)
				beta = tf.concat(1,beta, name='beta')
				# weights of each (xj,hj)
				alpha = tf.nn.softmax(beta)	# shape: batch_size x encoder_unroll
				alpha = tf.split(1,self._max_encoder_unrollings,alpha, name='alpha')	# list of unrolling, each elmt of shape [batch_size x 1]
				z_t = tf.zeros([1 , 2*self._n_hidden + self._vocab_size])

				for j in xrange(self._max_encoder_unrollings):
					xh = tf.cond( tf.less(tf.constant(j,dtype=tf.int64),self._encoder_unrollings),
									  lambda: tf.concat(1,[encoder_inputs[j],h_encoder[j]], name='xhj'), # (x_j, h_j)
									  lambda: tf.zeros([1,2*self._n_hidden + self._vocab_size])
									)
					z_t += alpha[j] * xh
				return z_t

			def precalc_Ux_Vh(encoder_inputs,h_enc):
				ux_vh = []
				for i in xrange(self._max_encoder_unrollings):
					ux_vh.append( tf.cond( tf.less(tf.constant(i,dtype=tf.int64),self._encoder_unrollings),
												  lambda: tf.matmul(encoder_inputs[i],U_a) + tf.matmul(h_enc[i],V_a),
												  lambda: tf.zeros([1,self._n_hidden])
												)
						 			)
				return ux_vh

			U_V_precalc = precalc_Ux_Vh(self._encoder_inputs,h_encoder)

		#######################################################################################################################			
		## Decoder loop
		with tf.variable_scope('Decoder') as scope:
			dec_cell = CustomLSTMCell(self._n_hidden, forget_bias=1.0, input_size=self._vocab_size)
			dec_cell_dp = tf.nn.rnn_cell.DropoutWrapper(
									dec_cell, output_keep_prob=keep_prob)
			# Initial states
			#s_t = tf.tanh( tf.matmul(h1,w_trans_s)+b_trans_s , name='s_0')
			#c_t = tf.tanh( tf.matmul(c1,w_trans_c)+b_trans_c , name='c_0')
			c_t,s_t = tf.split(1,2,c1h1)
			state = c1h1

			logits = [] # logits per rolling
			self._train_predictions = []
			for i in xrange(self._max_decoder_unrollings):
				if i > 0: tf.get_variable_scope().reuse_variables()
				# world state vector at step i
				y_t = tf.cond( tf.less(tf.constant(i,dtype=tf.int64),self._decoder_unrollings),
									lambda: self._world_state_vectors[i],		# batch_size x num_local_feats (feat_id format)
									lambda: tf.zeros([1,self._y_size])
								)
				# embeed world vector | relu nodes
				ey = tf.nn.relu(tf.matmul(y_t,w_emby) + b_emby, name='Ey')
				# run attention mechanism
				z_t = context_vector(s_t,h_encoder,U_V_precalc,self._encoder_inputs)

				# merge inputs and attention prev state
				dec_input = tf.concat(1,[ey,z_t])
				s_t,state = dec_cell_dp(dec_input,state,scope="CustomLSTMCell")

				# Hidden linear layer before output, proyects z_t,y_t, and s_t to an embeeding-size layer
				hq = ey + tf.matmul(s_t,ws) + tf.matmul(z_t,wz) + b_q
				# Output layer
				logit = tf.matmul(hq,wo) + b_o
				fill_pred = tf.constant([[0.,0.,0.,0.,1.]])	# one-hot vector for PAD
				prediction = tf.cond( tf.less(tf.constant(i,dtype=tf.int64),self._decoder_unrollings),
									  lambda: tf.nn.softmax(logit,name='prediction'),
									  lambda: fill_pred
								)
				logits.append(logit)
				self._train_predictions.append(prediction)
			#END-FOR-DECODER-UNROLLING
			# Loss definition
			reshaped_dec_outputs = []
			for i in xrange(self._max_decoder_unrollings):
				out = tf.cond( tf.less(tf.constant(i,dtype=tf.int64),self._decoder_unrollings),
									lambda: self._decoder_outputs[i],
									lambda: 4*tf.ones([1],dtype=tf.int32)
					)
				reshaped_dec_outputs.append(out)
			self._loss = tf.nn.seq2seq.sequence_loss(logits,
																 targets=reshaped_dec_outputs,
																 weights=[tf.ones([1],dtype=tf.float32)]*self._max_decoder_unrollings,
																 #np.ones(shape=(self._max_decoder_unrollings,1),dtype=np.float32),
																 name='train_loss')

		###################################################################################################################
		# TESTING
		with tf.variable_scope('Encoder',reuse=True) as scope:
			test_h,c1h1 = bidirectional_rnn(fw_cell,bw_cell,
										 self._encoder_inputs,
										 dtype=tf.float32,
										 sequence_length = self._encoder_unrollings*tf.ones([1],tf.int64),
										 scope='Encoder'
										 )
			
		#self._test_s0 = tf.tanh( tf.matmul(h1,w_trans_s)+b_trans_s, name='test_s0')
		#self._test_c0 = tf.tanh( tf.matmul(c1,w_trans_c)+b_trans_c, name='test_c0')
		self._test_c0,self._test_s0 = tf.split(1,2,c1h1)

		test_ux_vh = precalc_Ux_Vh(self._encoder_inputs,test_h)

		with tf.variable_scope('Decoder',reuse=True) as scope:
			# embeed world vector | relu nodes
			ey = tf.nn.relu(tf.matmul(self._test_yt,w_emby) + b_emby, name='Ey_test')
			# context vector
			z_t = context_vector(self._test_st,test_h,test_ux_vh,self._encoder_inputs)
			
			state = tf.concat(1,[self._test_ct,self._test_st])
			dec_input = tf.concat(1,[ey,z_t])

			_,temp = dec_cell(dec_input, state,scope="CustomLSTMCell")
			self._next_ct,self._next_st = tf.split(1,2,temp)

			# Hidden linear layer before output, proyects z_t,y_t, and s_t to an embeeding-size layer
			hq = ey + tf.matmul(self._next_st,ws) + tf.matmul(z_t,wz) + b_q
			logit = tf.matmul(hq,wo) + b_o
			self._test_prediction = tf.nn.softmax(logit,name='inf_prediction')
			# Loss definition
			self._test_loss = tf.nn.softmax_cross_entropy_with_logits(logit,self._test_decoder_output, name="test_loss")
		#END-DECODER-SCOPE

		
		with tf.variable_scope('Optimization') as scope:
			# Optimizer setup
			self._global_step = tf.Variable(0,trainable=False)
			
			self._learning_rate = tf.train.exponential_decay(self._init_learning_rate,
															 self._global_step, 
															 80000,
															 self._learning_rate_decay_factor,
															 staircase=True)
			
			
			#optimizer = tf.train.GradientDescentOptimizer(self._learning_rate)
			optimizer = tf.train.AdamOptimizer(learning_rate=self._learning_rate,
														  epsilon=1e-1)
			# Gradient clipping
			#gradients = tf.gradients(self._loss,params)
			gradients,params = zip(*optimizer.compute_gradients(self._loss))
			self._clipped_gradients, self._global_norm = tf.clip_by_global_norm(gradients, self._max_gradient_norm)
			# Apply clipped gradients
			self._optimizer = optimizer.apply_gradients( zip(self._clipped_gradients, params) , global_step=self._global_step )

		with tf.name_scope('Summaries') as scope:
			# Summaries
			clipped_resh = [tf.reshape(tensor,[-1]) for tensor in self._clipped_gradients if tensor]
			clipped_resh = tf.concat(0,clipped_resh)
			# weight summaries
			temp = tf.trainable_variables()
			alignw = [tf.reshape(tensor,[-1]) for tensor in temp[:6]]
			alignw = tf.concat(0,alignw)
			eyw = [tf.reshape(tensor,[-1]) for tensor in temp[6:8]]
			eyw = tf.concat(0,eyw)
			how = [tf.reshape(tensor,[-1]) for tensor in temp[8:10]]
			how = tf.concat(0,how)
			ow = [tf.reshape(tensor,[-1]) for tensor in temp[10:13]]
			ow = tf.concat(0,ow)

			encw = [tf.reshape(tensor,[-1]) for tensor in temp[13:17]]
			encw = tf.concat(0,encw)
			decw = [tf.reshape(tensor,[-1]) for tensor in temp[17:19]]
			decw = tf.concat(0,decw)

			# sum strings
			_ = tf.scalar_summary("loss",self._loss)
			_ = tf.scalar_summary('global_norm',self._global_norm)
			_ = tf.scalar_summary('learning rate',self._learning_rate)
			_ = tf.histogram_summary('clipped_gradients', clipped_resh)
			_ = tf.histogram_summary('aligner', alignw)
			_ = tf.histogram_summary('Y embedding', eyw)
			_ = tf.histogram_summary('hidden output layer', how)
			_ = tf.histogram_summary('output layer', ow)
			_ = tf.histogram_summary('encoder w', encw)
			_ = tf.histogram_summary('decoder w', decw)

			self._merged = tf.merge_all_summaries()

			# include accuracies as summaries
			self._train_acc = tf.placeholder(tf.float32,name='train_accuracy')
			self._val_acc   = tf.placeholder(tf.float32,name='val_accuracy')
			self._train_acc_sum = tf.scalar_summary("Training accuracy",self._train_acc)
			self._val_acc_sum = tf.scalar_summary("Validation accuracy",self._val_acc)

		# checkpoint saver
		self.saver = tf.train.Saver(tf.all_variables())
		self.vars_to_init = set(tf.all_variables()) - set(tf.trainable_variables())
		self.saver = tf.train.Saver(tf.trainable_variables())