Esempio n. 1
0
    def __init__(self, args, is_training=True):
        self.args = args

        if not is_training:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            self.cell = rnn_cell.BasicRNNCell(args.rnn_size)
        elif args.model == 'gru':
            self.cell = rnn_cell.GRUCell(args.rnn_size)
        elif args.model == 'lstm':
            self.cell = rnn_cell.BasicLSTMCell(args.rnn_size)
        else:
            raise Exception('model type not supported: {}'.format(args.model))

        self.cell = rnn_cell.MultiRNNCell([self.cell] * args.num_layers)

        self.input_data    = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets       = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) # Target replication
        self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnn'):
            softmax_w = tf.get_variable('softmax_w', [args.rnn_size, 2])
            softmax_b = tf.get_variable('softmax_b', [2])

            with tf.device('/cpu:0'):
                embedding = tf.get_variable('embedding', [args.vocab_size, args.rnn_size])
                inputs    = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                inputs    = [tf.squeeze(i, [1]) for i in inputs]

            outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, 
                self.cell, loop_function=None)

        output_tf   = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.nn.xw_plus_b(output_tf, softmax_w, softmax_b)
        self.probs  = tf.nn.softmax(self.logits)
        
        loss = seq2seq.sequence_loss_by_example(
            [self.logits],
            [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])])

        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length

        self.final_state = last_state
        self.lr          = tf.Variable(0.0, trainable = False)
        tvars            = tf.trainable_variables()
        grads, _         = tf.clip_by_global_norm(tf.gradients(self.cost, tvars, aggregation_method=2), args.grad_clip)
        optimizer        = tf.train.AdamOptimizer(self.lr)
        self.train_op    = optimizer.apply_gradients(zip(grads, tvars))
Esempio n. 2
0
    def __init__(self,
                 sess,
                 vocab_size,
                 cell_size,
                 embedding_size,
                 num_layer,
                 memory_size,
                 log_dir,
                 learning_rate=0.001,
                 momentum=0.9,
                 learning_rate_decay_factor=0.85,
                 use_dropout=True,
                 l2_coef=1e-6):

        with tf.name_scope("io"):
            self.inputs = tf.placeholder(dtype=tf.int32,
                                         shape=(None, None),
                                         name="prev_words")
            self.input_lens = tf.placeholder(dtype=tf.int32,
                                             shape=(None, ),
                                             name="sent_len")
            self.labels = tf.placeholder(dtype=tf.int32,
                                         shape=(None, None),
                                         name="next_words")
            self.keep_prob = tf.placeholder(dtype=tf.float32, name="keep_prob")
            self.learning_rate = tf.Variable(float(learning_rate),
                                             trainable=False)
            self.learning_rate_decay_op = self.learning_rate.assign(
                self.learning_rate * learning_rate_decay_factor)

        max_sent_len = array_ops.shape(self.labels)[1]
        with variable_scope.variable_scope("word-embedding"):
            embedding = tf.get_variable("embedding",
                                        [vocab_size, embedding_size],
                                        dtype=tf.float32)
            input_embedding = embedding_ops.embedding_lookup(
                embedding,
                tf.squeeze(tf.reshape(self.inputs, [-1, 1]), squeeze_dims=[1]))

            input_embedding = tf.reshape(input_embedding,
                                         [-1, max_sent_len, embedding_size])

        with variable_scope.variable_scope("rnn"):
            # cell = tf_helpers.MemoryGRUCell(cell_size, memory_size, attn_size=100)
            cell = rnn_cell.BasicLSTMCell(cell_size)

            if use_dropout:
                cell = rnn_cell.DropoutWrapper(cell,
                                               output_keep_prob=self.keep_prob,
                                               input_keep_prob=self.keep_prob)

            if num_layer > 1:
                cell = rnn_cell.MultiRNNCell([cell] * num_layer,
                                             state_is_tuple=True)

            # add output projection
            cell = tf.nn.rnn_cell.OutputProjectionWrapper(cell, vocab_size)

            # and enc_last_state will be same as the true last state
            self.logits, last_state = tf.nn.dynamic_rnn(
                cell,
                input_embedding,
                dtype=tf.float32,
                sequence_length=self.input_lens,
            )
        self.loss = self.sequence_loss()
        tf.scalar_summary("entropy_loss", self.loss)
        tf.scalar_summary("perplexity", tf.exp(self.loss))
        self.summary_op = tf.merge_all_summaries()

        # weight decay
        """
        if l2_coef > 0.0:
            all_weights = []
            vars = tf.trainable_variables()
            for v in vars:
                if "bias" not in v.name.lower():
                    all_weights.append(tf.nn.l2_loss(v))
                    print("adding l2 to %s" %v.name)

            loss_l2= tf.add_n(all_weights)
            self.reg_loss = self.loss + l2_coef * loss_l2
        else:
            self.reg_loss = self.loss
            """

        # optimization
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), 5.0)
        optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        self.train_ops = optimizer.apply_gradients(zip(grads, tvars))

        train_log_dir = os.path.join(log_dir, "train")
        valid_log_dir = os.path.join(log_dir, "valid")
        print "Save summary to %s" % log_dir
        self.train_summary_writer = tf.train.SummaryWriter(
            train_log_dir, sess.graph)
        self.valid_summary_writer = tf.train.SummaryWriter(
            valid_log_dir, sess.graph)
        self.saver = tf.train.Saver(tf.all_variables())
Esempio n. 3
0
    def __init__(self, config, is_training=False):
        self.config = config
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        self.hidden_size = hidden_size = config.hidden_size
        self.num_layers = 1
        vocab_size = config.vocab_size
        self.max_grad_norm = config.max_grad_norm
        self.use_lstm = config.use_lstm

        # Placeholders for inputs.
        self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self.targets = tf.placeholder(tf.int32, [batch_size, num_steps])
        self.initial_state = array_ops.zeros(
            array_ops.pack([self.batch_size, self.num_steps]),
            dtype=tf.float32).set_shape([None, self.num_steps])

        embedding = tf.get_variable(
            'embedding', [self.config.vocab_size, self.config.hidden_size])

        # Set up ACT cell and inner rnn-type cell for use inside the ACT cell.
        with tf.variable_scope("rnn"):
            if self.use_lstm:
                inner_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size)
            else:
                inner_cell = rnn_cell.GRUCell(self.config.hidden_size)

        with tf.variable_scope("ACT"):

            act = ACTCell(self.config.hidden_size,
                          inner_cell,
                          config.epsilon,
                          max_computation=config.max_computation,
                          batch_size=self.batch_size)

        inputs = tf.nn.embedding_lookup(embedding, self.input_data)
        inputs = [
            tf.squeeze(single_input, [1])
            for single_input in tf.split(1, self.config.num_steps, inputs)
        ]

        self.outputs, final_state = rnn(act, inputs, dtype=tf.float32)

        # Softmax to get probability distribution over vocab.
        output = tf.reshape(tf.concat(1, self.outputs), [-1, hidden_size])
        softmax_w = tf.get_variable("softmax_w", [hidden_size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        self.logits = tf.matmul(
            output,
            softmax_w) + softmax_b  # dim (numsteps*batchsize, vocabsize)

        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([batch_size * num_steps])], vocab_size)

        # Add up loss and retrieve batch-normalised ponder cost: sum N + sum Remainder.
        ponder_cost = act.calculate_ponder_cost(
            time_penalty=self.config.ponder_time_penalty)
        self.cost = (tf.reduce_sum(loss) / batch_size) + ponder_cost
        self.final_state = self.outputs[-1]

        if is_training:
            self.lr = tf.Variable(0.0, trainable=False)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                              self.max_grad_norm)
            optimizer = tf.train.AdamOptimizer(self.config.learning_rate)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Esempio n. 4
0
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_labels,
                 batch_size,
                 embed,
                 learning_rate=0.001,
                 max_gradient_norm=5.0
                 ):
        # todo: implement placeholders
        self.texts1 = tf.placeholder(tf.string, [batch_size, None], name='texts1')
        self.texts2 = tf.placeholder(tf.string, [batch_size, None], name='texts2')  # shape: batch*len
        self.texts_length = tf.placeholder(tf.int32, [None], name='texts_length')  # shape: batch
        self.len = tf.constant(1.0, shape=[batch_size])
        self.labels = tf.placeholder(
            tf.int64, [None], name='labels')  # shape: batch
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
        self.embed_units = num_embed_units
        self.batch_size = batch_size
        self._initializer = tf.truncated_normal_initializer(stddev=0.1)
        self.symbol2index = MutableHashTable(
            key_dtype=tf.string,
            value_dtype=tf.int64,
            default_value=UNK_ID,
            shared_name="in_table",
            name="in_table",
            checkpoint=True)
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(
            float(learning_rate), trainable=False, dtype=tf.float32)
        self.global_step = tf.Variable(0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)

        self.index_input1 = self.symbol2index.lookup(self.texts1)   # batch*len
        self.index_input2 = self.symbol2index.lookup(self.texts2)
        '''
        self.h_s1 = tf.Variable(tf.constant(0.0,shape=[num_units+1, batch_size, num_embed_units]), trainable=False)
        self.h_s2 = tf.Variable(tf.constant(0.0,shape=[num_units+1, batch_size, num_embed_units]), trainable=False)
        self.h_r = tf.Variable(tf.constant(0.0,shape=[num_units+1, batch_size, num_embed_units]), trainable=False)
        self.a1 = tf.Variable(tf.constant(0.0,shape=[num_units+1, batch_size, num_embed_units]), trainable=False)
        self.a2 = tf.Variable(tf.constant(0.0,shape=[num_units+1, batch_size, num_embed_units]), trainable=False)
        '''
        self.h_s1 = []
        self.h_s2 = []
        self.h_r = []
        self.a1 = []
        self.a2 = []
        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable(
                'embed', [num_symbols, num_embed_units], tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable(
                'embed', dtype=tf.float32, initializer=embed)

        self.embed_input1 = tf.nn.embedding_lookup(
            self.embed, self.index_input1)  # batch*len*embed_unit
        self.embed_input2 = tf.nn.embedding_lookup(
            self.embed, self.index_input2)
        with tf.variable_scope('lstm_s'):
            self.lstm_s = rnn_cell.BasicLSTMCell(num_units=num_embed_units, forget_bias=0)
        '''
        out_s1, state_s1 = tf.nn.dynamic_rnn(self.lstm_s, self.embed_input1, self.texts_length, dtype=tf.float32)
        out_s2, state_s2 = tf.nn.dynamic_rnn(self.lstm_s, self.embed_input2, self.texts_length, dtype=tf.float32)
        self.h_s1 = state_s1
        self.h_s2 = state_s2
        '''
        with tf.variable_scope('lstm_r'):
            self.lstm_r = rnn_cell.BasicLSTMCell(num_units=num_embed_units, forget_bias=0)
        '''
        self.ini_op1 = tf.assign(self.h_s1[0], self.lstm_s.zero_state(batch_size=batch_size, dtype=tf.float32))
        self.ini_op2 = tf.assign(self.h_s2[0], self.lstm_s.zero_state(batch_size=batch_size, dtype=tf.float32))
        self.ini_op3 = tf.assign(self.h_r[0], self.lstm_r.zero_state(batch_size=batch_size, dtype=tf.float32))
        self.ini_op4 = tf.assign(self.a1[0], self.lstm_r.zero_state(batch_size=batch_size, dtype=tf.float32))
        self.ini_op5 = tf.assign(self.a2[0], self.lstm_r.zero_state(batch_size=batch_size, dtype=tf.float32))
        '''
        
        self.h_s1.append(self.lstm_s.zero_state(batch_size=batch_size, dtype=tf.float32))
        self.h_s2.append(self.lstm_s.zero_state(batch_size=batch_size, dtype=tf.float32))
        self.h_r.append(self.lstm_s.zero_state(batch_size=batch_size, dtype=tf.float32))
        self.a1.append(self.lstm_r.zero_state(batch_size=batch_size, dtype=tf.float32))
        self.a2.append(self.lstm_r.zero_state(batch_size=batch_size, dtype=tf.float32)) 
        
        W = tf.Variable(self._initializer(shape=[num_embed_units, num_labels],dtype=tf.float32))
        bias = tf.Variable(tf.constant(0.0, shape=[num_labels]), dtype=tf.float32)

        i = tf.constant(1, dtype=tf.int64)
        print self.index_input1[1].get_shape()
        length = self._length(self.index_input1[1])
        self.ind = 1
        state_s1 = self.lstm_s.zero_state(batch_size=batch_size, dtype=tf.float32)
        state_s2 = self.lstm_s.zero_state(batch_size=batch_size, dtype=tf.float32)
        state_r = self.lstm_r.zero_state(batch_size=batch_size, dtype=tf.float32)
        def c(t, s1, s2, sr): return tf.less(t, length+1)

        def b(t, s1, s2, sr): return self.attention(t, s1, s2, sr)
        i, state_s1, state_s2, state_r = tf.while_loop(cond=c, body=b, loop_vars=(i, state_s1, state_s2, state_r))

        
        logits = tf.matmul(state_r.h, W) + bias

        #logits = tf.layers.dense(outputs, num_labels)

        # todo: implement unfinished networks

        self.loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=self.labels, logits=logits), name='loss')
        mean_loss = self.loss / \
            tf.cast(tf.shape(self.labels)[0], dtype=tf.float32)
        predict_labels = tf.argmax(logits, 1, 'predict_labels')
        self.accuracy = tf.reduce_sum(
            tf.cast(tf.equal(self.labels, predict_labels), tf.int32), name='accuracy')

        self.params = tf.trainable_variables()
        # calculate the gradient of parameters
        '''
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(mean_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(
            zip(clipped_gradients, self.params), global_step=self.global_step)
        '''
        self.global_step = tf.Variable(0, trainable=False)
        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(mean_loss, global_step=self.global_step,
                                                                            var_list=self.params)
        tf.summary.scalar('loss/step', self.loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)

        self.merged_summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
Esempio n. 5
0
	def __init__(self, args, is_training=True):

		if not is_training:
			seq_length = 1
		else:
			seq_length = args.seq_length

		if args.model == 'rnn':
			cell_gen = rnn_cell.BasicRNNCell(args.rnn_size)
			cell_dis = rnn_cell.BasicRNNCell(args.rnn_size)
		elif args.model == 'gru':
			cell_gen = rnn_cell.GRUCell(args.rnn_size)
			cell_dis = rnn_cell.GRUCell(args.rnn_size)
		elif args.model == 'lstm':
			cell_gen = rnn_cell.BasicLSTMCell(args.rnn_size)
			cell_dis = rnn_cell.BasicLSTMCell(args.rnn_size)
		else:
			raise Exception('model type not supported: {}'.format(args.model))

		# Pass the generated sequences and targets (1)
		with tf.name_scope('input'):
			with tf.name_scope('data'):
				self.input_data  = tf.placeholder(tf.int32, [args.batch_size, seq_length])
			with tf.name_scope('targets'):
				self.targets     = tf.placeholder(tf.int32, [args.batch_size, seq_length])

		############
		# Generator
		############
		with tf.variable_scope('generator'):
			self.cell_gen = rnn_cell.MultiRNNCell([cell_gen] * args.num_layers)
			self.initial_state_gen = self.cell_gen.zero_state(args.batch_size, tf.float32)	

			with tf.variable_scope('rnn'):
				softmax_w = tf.get_variable('softmax_w', [args.rnn_size, args.vocab_size])
				softmax_b = tf.get_variable('softmax_b', [args.vocab_size])
				
				with tf.device('/cpu:0'):
					embedding  = tf.get_variable('embedding', [args.vocab_size, args.rnn_size])
					inputs_gen = tf.split(1, seq_length, tf.nn.embedding_lookup(
						embedding, self.input_data))
					inputs_gen = [tf.squeeze(i, [1]) for i in inputs_gen]

			outputs_gen, last_state_gen = seq2seq.rnn_decoder(inputs_gen, self.initial_state_gen, 
				self.cell_gen, loop_function=None)
			
			self.logits_sequence = []
			for output_gen in outputs_gen:
				logits_gen  = tf.nn.xw_plus_b(output_gen, softmax_w, softmax_b)
				self.logits_sequence.append(logits_gen)

			self.final_state_gen = last_state_gen

		################
		# Discriminator
		################
		with tf.variable_scope('discriminator'):
			self.cell_dis = rnn_cell.MultiRNNCell([cell_dis] * args.num_layers)
			self.initial_state_dis = self.cell_dis.zero_state(args.batch_size, tf.float32)

			with tf.variable_scope('rnn'):
				softmax_w = tf.get_variable('softmax_w', [args.rnn_size, 2])
				softmax_b = tf.get_variable('softmax_b', [2])

				inputs_dis = []
				embedding  = tf.get_variable('embedding', [args.vocab_size, args.rnn_size])
				for logit in self.logits_sequence:
					inputs_dis.append(tf.matmul(logit, embedding))
					# inputs_dis.append(tf.matmul(tf.nn.softmax(logit), embedding))
					
				outputs_dis, last_state_dis = seq2seq.rnn_decoder(inputs_dis,
					self.initial_state_dis, self.cell_dis, loop_function=None)

			probs, logits = [], []
			for output_dis in outputs_dis:
				logit = tf.nn.xw_plus_b(output_dis, softmax_w, softmax_b)
				prob = tf.nn.softmax(logit)
				logits.append(logit)
				probs.append(prob)

			with tf.name_scope('summary'):
				probs      = tf.pack(probs)
				probs_real = tf.slice(probs, [0,0,1], [args.seq_length, args.batch_size, 1])
				variable_summaries(probs_real, 'probability of real')

			self.final_state_dis = last_state_dis

		#########
		# Train
		#########
		with tf.name_scope('train'):
			gen_loss = seq2seq.sequence_loss_by_example(
				logits,
				tf.unpack(tf.transpose(self.targets)), 
				tf.unpack(tf.transpose(tf.ones_like(self.targets, dtype=tf.float32))))

			self.gen_cost = tf.reduce_sum(gen_loss) / args.batch_size
			tf.scalar_summary('training loss', self.gen_cost)
			self.lr_gen = tf.Variable(0.0, trainable = False)		
			self.tvars 	= tf.trainable_variables()
			gen_vars    = [v for v in self.tvars if not v.name.startswith("discriminator/")]

			if is_training:
				gen_grads            = tf.gradients(self.gen_cost, gen_vars)
				self.all_grads       = tf.gradients(self.gen_cost, self.tvars)
				gen_grads_clipped, _ = tf.clip_by_global_norm(gen_grads, args.grad_clip)
				gen_optimizer        = tf.train.AdamOptimizer(self.lr_gen)
				self.gen_train_op    = gen_optimizer.apply_gradients(
											zip(gen_grads_clipped, gen_vars))				

		with tf.name_scope('summary'):
			with tf.name_scope('weight_summary'):
				for v in self.tvars:
					variable_summaries(v, v.op.name)
			if is_training:
				with tf.name_scope('grad_summary'):
					for var, grad in zip(self.tvars, self.all_grads):
						variable_summaries(grad, 'grad/' + var.op.name)

		self.merged = tf.merge_all_summaries()
Esempio n. 6
0
    def __init__(self, embed_size, lstm_size, vocab_size, \
                 batch_size, seq_length, learn_rate, \
                 keep_prob=1.0, num_layers=2, name='char_lstm'):
        '''
        Initialize a character-level multilayer LSTM language model.
        Arguments:
            @embed_size: dimensions of embedding space
            @vocab_size: number of things in vocabulary (characters!)
            @batch_size: sequences per training batch
            @seq_length: length of sequences in each training batch
            @learn_rate: AdamOptimizer step size
            @keep_prob:  1 - dropout probability
            @num_layers: number of LSTM cells to stack
        '''
        # store params
        self.embed_size, self.lstm_size = embed_size, lstm_size
        self.vocab_size, self.seq_length = vocab_size, seq_length
        self.batch_size, self.learn_rate = batch_size, learn_rate
        self.kp, self.num_layers = keep_prob, num_layers
        self.name = name

        # Placeholders for input/output and dropout
        self.train_inputs = tf.placeholder(tf.int32,
                                           shape=[batch_size, seq_length])
        self.train_targets = tf.placeholder(tf.int32,
                                            shape=[batch_size, seq_length])
        self.sample_inputs = tf.placeholder(tf.int32, shape=[1, 1])

        # Set up embeddings
        E = weight('embedding', [vocab_size, embed_size])
        train_embeddings = tf.nn.embedding_lookup(E,
                                                  self.train_inputs,
                                                  name='train_embeddings')
        dropped_train_embeddings = (tf.nn.dropout(train_embeddings, self.kp)
                                    if self.kp < 1.0 else train_embeddings)
        sample_embeddings = tf.nn.embedding_lookup(E,
                                                   self.sample_inputs,
                                                   name='sample_embeddings')

        # Set up 2-layer LSTM
        # Use dynamic_rnn to run the cells
        with tf.variable_scope('lstm') as scope:
            single_cell = rnn_cell.BasicLSTMCell(lstm_size)
            self.cell = rnn_cell.MultiRNNCell([single_cell] * num_layers,
                                              state_is_tuple=True)
            self.train_init_state = self.cell.zero_state(
                batch_size, tf.float32)
            self.sample_init_state = self.cell.zero_state(1, tf.float32)
            train_outputs, self.train_state = dynamic_rnn(
                self.cell,
                dropped_train_embeddings,
                initial_state=self.train_init_state)
            scope.reuse_variables()
            sample_outputs, self.sample_state = dynamic_rnn(
                self.cell,
                sample_embeddings,
                initial_state=self.sample_init_state)

        reshaped_train_outputs = tf.reshape(
            train_outputs, (batch_size * seq_length, lstm_size))
        reshaped_sample_outputs = tf.reshape(sample_outputs, (1, lstm_size))

        # final feedforward layer (model logits)
        with tf.variable_scope('ff') as scope:
            ff_weights = weight('ff_weights', (lstm_size, vocab_size))
            ff_biases = weight('ff_biases', (vocab_size, ))
            self.ls = train_logits = tf.add(
                ff_biases, tf.matmul(reshaped_train_outputs, ff_weights))
            scope.reuse_variables()
            sample_logits = tf.add(
                ff_biases, tf.matmul(reshaped_sample_outputs, ff_weights))
        self.probs = softmax(sample_logits)

        # softmax and loss for training
        log_perps = tf.nn.seq2seq.sequence_loss_by_example(
            [train_logits], [tf.reshape(self.train_targets, [-1])],
            [tf.ones([batch_size * seq_length])])
        self.loss = tf.reduce_mean(log_perps) / batch_size

        # define trainer, saver, inits
        self.train_op = tf.train.AdamOptimizer(learn_rate).minimize(self.loss)
        self.init_op = tf.initialize_all_variables()
        self.saver = tf.train.Saver()
Esempio n. 7
0
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_labels,
                 batch_size,
                 embed,
                 learning_rate=0.001,
                 max_gradient_norm=5.0):

        self.texts1 = tf.placeholder(tf.string, [batch_size, None],
                                     name='texts1')
        self.texts2 = tf.placeholder(tf.string, [batch_size, None],
                                     name='texts2')  # shape: batch*len
        self.texts_length = tf.placeholder(tf.int32, [None],
                                           name='texts_length')  # shape: batch
        learning_rate_decay_factor = 0.9
        self.len = tf.constant(1.0, shape=[batch_size])
        self.labels = tf.placeholder(tf.int64, [None],
                                     name='labels')  # shape: batch
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
        self.embed_units = num_embed_units
        self.num_units = num_units
        self.batch_size = batch_size
        self._initializer = tf.truncated_normal_initializer(stddev=0.1)
        self.symbol2index = MutableHashTable(key_dtype=tf.string,
                                             value_dtype=tf.int64,
                                             default_value=UNK_ID,
                                             shared_name="in_table",
                                             name="in_table",
                                             checkpoint=True)
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.global_step = tf.Variable(0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)
        self.learning_rate = tf.train.exponential_decay(
            self.learning_rate, self.epoch, 1, 0.95)

        self.index_input1 = self.symbol2index.lookup(self.texts1)  # batch*len
        self.index_input2 = self.symbol2index.lookup(self.texts2)

        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.embed_input1 = tf.nn.embedding_lookup(
            self.embed, self.index_input1)  # batch*len*embed_unit
        self.embed_input2 = tf.nn.embedding_lookup(self.embed,
                                                   self.index_input2)

        with tf.variable_scope('lstm_s'):
            self.lstm_s = rnn_cell.BasicLSTMCell(num_units=num_units,
                                                 forget_bias=0)

        with tf.variable_scope('lstm_r'):
            self.lstm_r = rnn_cell.BasicLSTMCell(num_units=num_units,
                                                 forget_bias=0)

        out_s1, state_s1 = dynamic_rnn(self.lstm_s,
                                       self.embed_input1,
                                       self.texts_length,
                                       dtype=tf.float32,
                                       scope='rnn')
        out_s2, state_s2 = dynamic_rnn(self.lstm_s,
                                       self.embed_input2,
                                       self.texts_length,
                                       dtype=tf.float32,
                                       scope='rnn')

        self.h_s1 = out_s1
        self.h_s2 = out_s2
        reshaped_s1 = tf.reshape(self.h_s1, [-1, self.num_units])
        reshaped_s2 = tf.reshape(self.h_s2, [-1, self.num_units])
        with tf.variable_scope('Attn_'):
            W_s = tf.get_variable(shape=[self.num_units, self.num_units],
                                  initializer=self._initializer,
                                  name='W_s')
        self.s_1 = tf.matmul(reshaped_s1, W_s)
        self.s_1 = tf.transpose(
            tf.reshape(self.s_1, [self.batch_size, -1, self.num_units]),
            [1, 2, 0])
        i = tf.constant(0)

        self.length = tf.reduce_max(self.texts_length)
        print(self.length)
        state_r = self.lstm_r.zero_state(batch_size=batch_size,
                                         dtype=tf.float32)

        def c(t, sr):
            return tf.less(t, self.length)

        def b(t, sr):
            return self.attention(t, sr)

        i, state_r = tf.while_loop(cond=c, body=b, loop_vars=(i, state_r))

        with tf.variable_scope('fully_connect'):
            w_fc = tf.get_variable(shape=[self.num_units, num_labels],
                                   initializer=self._initializer,
                                   name='w_fc')
            b_fc = tf.get_variable(shape=[num_labels],
                                   initializer=self._initializer,
                                   name='b_fc')
        logits = tf.matmul(state_r.h, w_fc) + b_fc

        self.loss = tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels,
                                                           logits=logits),
            name='loss')
        mean_loss = self.loss / \
            tf.cast(tf.shape(self.labels)[0], dtype=tf.float32)
        predict_labels = tf.argmax(logits, 1, 'predict_labels')
        self.accuracy = tf.reduce_sum(tf.cast(
            tf.equal(self.labels, predict_labels), tf.int64),
                                      name='accuracy')

        self.params = tf.trainable_variables()
        # calculate the gradient of parameters
        for item in tf.global_variables():
            print(item)
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(mean_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        #self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(mean_loss, global_step=self.global_step,
        #var_list=self.params)
        tf.summary.scalar('loss/step', self.loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)

        self.merged_summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)