コード例 #1
0
ファイル: seq2seq_test.py プロジェクト: xzm2004260/tensorflow
    def testSequenceLossByExample(self):
        with self.test_session() as sess:
            output_classes = 5
            logits = [
                tf.constant(i + 0.5, shape=[2, output_classes])
                for i in xrange(3)
            ]
            targets = [tf.constant(i, tf.int32, shape=[2]) for i in xrange(3)]
            weights = [tf.constant(1.0, shape=[2]) for i in xrange(3)]

            average_loss_per_example = seq2seq.sequence_loss_by_example(
                logits,
                targets,
                weights,
                output_classes,
                average_across_timesteps=True)
            res = sess.run(average_loss_per_example)
            self.assertAllClose(res, np.asarray([1.609438, 1.609438]))

            loss_per_sequence = seq2seq.sequence_loss_by_example(
                logits,
                targets,
                weights,
                output_classes,
                average_across_timesteps=False)
            res = sess.run(loss_per_sequence)
            self.assertAllClose(res, np.asarray([4.828314, 4.828314]))
コード例 #2
0
ファイル: char_rnn.py プロジェクト: mohakraaj/lyric_generator
    def create_model(self):
        
        self.input_data  = tf.placeholder(tf.int32, [self.batch_size, self.seq_length], name="input_data")
        self.target_data = tf.placeholder(tf.int32,[self.batch_size, self.seq_length],  name="target_data")

        # define hyper_parameters
        self.keep_prob = tf.Variable(0.3, trainable=False, name='keep_prob')
        self.lr = tf.Variable(0.0, trainable=False, name="lr")
              
        softmax_weights = tf.get_variable("softmax_weights",[self.rnn_size, self.vocab_size])
        softmax_biases = tf.get_variable("softmax_biases", [self.vocab_size])
            
        lstm_cell = rnn_cell.BasicLSTMCell(self.rnn_size)

#        if self.is_training and self.keep_prob < 1:
#              lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=self.keep_prob)
        
        multilayer_cell = rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers)
        self.initial_state = multilayer_cell.zero_state(self.batch_size, tf.float32)    
        
        
            
        with tf.device("/cpu:0"):
            # define the embedding matrix for the whole vocabulary
            self.embedding = tf.get_variable("embeddings", [self.vocab_size, self.rnn_size])
            # take the vector representation for each word in the embeddings
            embeds = tf.nn.embedding_lookup(self.embedding, self.input_data)
        
        if self.is_training and self.keep_prob < 1:
            embeds = tf.nn.dropout(embeds, self.keep_prob)
        
        
        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_weights, softmax_biases)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(self.embedding, prev_symbol)
            
        #convert input to a list of seq_length
        inputs = tf.split(1,self.seq_length, embeds)
        
        #after splitting the shape becomes (batch_size,1,rnn_size). We need to modify it to [batch*rnn_size]
        inputs = [ tf.squeeze(input_, [1]) for input_ in inputs]    
    
        output,states= seq2seq.rnn_decoder(inputs,self.initial_state, multilayer_cell, loop_function=loop if self.infer else None, scope='rnnlm')
        
        output = tf.reshape(tf.concat(1, output), [-1, self.rnn_size])
        
        self.logits = tf.nn.xw_plus_b(output, softmax_weights, softmax_biases)
        self.probs = tf.nn.softmax(self.logits, name= "probability")
        
        loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.target_data, [-1])],  [tf.ones([self.batch_size * self.seq_length])], self.vocab_size )
        self.cost = tf.reduce_sum(loss) / ( self.batch_size * self.seq_length )
        
        self.final_state= states[-1]
        
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),self.grad_clip)
        
        optimizer = tf.train.AdamOptimizer(0.01)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #3
0
  def __init__(self, vocabularySize, config_param):
    self.vocabularySize = vocabularySize
    self.config = config_param

    self._inputX = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputsX")
    self._inputTargetsY = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputTargetsY")


    #Converting Input in an Embedded form
    with tf.device("/cpu:0"): #Tells Tensorflow what GPU to use specifically
      embedding = tf.get_variable("embedding", [self.vocabularySize, self.config.embeddingSize])
      embeddingLookedUp = tf.nn.embedding_lookup(embedding, self._inputX)
      inputs = tf.split(1, self.config.sequence_size, embeddingLookedUp)
      inputTensorsAsList = [tf.squeeze(input_, [1]) for input_ in inputs]


    #Define Tensor RNN
    singleRNNCell = rnn_cell.BasicRNNCell(self.config.hidden_size)
    self.multilayerRNN =  rnn_cell.MultiRNNCell([singleRNNCell] * self.config.num_layers)
    self._initial_state = self.multilayerRNN.zero_state(self.config.batch_size, tf.float32)

    #Defining Logits
    hidden_layer_output, states = rnn.rnn(self.multilayerRNN, inputTensorsAsList, initial_state=self._initial_state)
    hidden_layer_output = tf.reshape(tf.concat(1, hidden_layer_output), [-1, self.config.hidden_size])
    self._logits = tf.nn.xw_plus_b(hidden_layer_output, tf.get_variable("softmax_w", [self.config.hidden_size, self.vocabularySize]), tf.get_variable("softmax_b", [self.vocabularySize]))
    self._predictionSoftmax = tf.nn.softmax(self._logits)

    #Define the loss
    loss = seq2seq.sequence_loss_by_example([self._logits], [tf.reshape(self._inputTargetsY, [-1])], [tf.ones([self.config.batch_size * self.config.sequence_size])], self.vocabularySize)
    self._cost = tf.div(tf.reduce_sum(loss), self.config.batch_size)

    self._final_state = states[-1]
コード例 #4
0
ファイル: train.py プロジェクト: ligz07/Tensorflow
    def __init__(self, vocab_size, batch_size, sequece_length, embedding_size, num_classes):
        self.hyperParam = {}
        self.hyperParam["hidden_num"] = 20
        self.hyperParam["l2_lamda"] = 3;
        self.hyperParam["dropout_keep_prob"] = 0.5;
        l2_loss = tf.constant(0.0)
        
        self.dropout_keep_prob = 0.5
        ##rnnCell = rnn_cell.BasicRNNCell(hidden_num)
        rnnCell = rnn_cell.BasicLSTMCell(self.hyperParam["hidden_num"], forget_bias=1.0) 
        self.input_data = tf.placeholder(tf.int32, shape=[None, sequece_length], name = "input_data")
        self.weights = tf.placeholder(tf.int32, shape=[None, sequece_length], name= "weights")
        self.output_data = tf.placeholder(tf.int32, [None, sequece_length], name = "output_data")
        a = tf.shape(self.output_data)[0]

        #self.inputs = []
        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, embedding_size])
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)
            #for i, v in enumerate(input_refine):
            #    self.inputs.append(tf.nn.embedding_lookup(embedding, input_refine[i]))
        self.inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, sequece_length, inputs)]
        self.output, self.states = rnn.rnn(rnnCell, self.inputs, dtype=tf.float32)

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = [tf.nn.dropout(p, self.hyperParam["dropout_keep_prob"]) for p in self.output]

        predictions = [];
        with tf.name_scope("result"):
            W = tf.Variable(tf.truncated_normal([self.hyperParam["hidden_num"], num_classes], stddev=0.1), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            #output = tf.reshape(tf.concat(1, self.output), [-1, hidden_num])
            output = tf.reshape(tf.concat(1, self.h_drop), [-1, self.hyperParam["hidden_num"]])
            logits = tf.matmul(output, W) + b
            self.scores = logits
            #self.new_scores = [tf.squeeze(k, [1]) for k in tf.split(1, sequece_length, tf.reshape(logits, [-1, sequece_length ,num_classes]))]

        losses = 0;
        accuracy = []
        with tf.name_scope("loss"):
            output_refine = tf.reshape(self.output_data, [-1])
            #output_refine = tf.split(1, sequece_length, self.output_data)
            #weigth = tf.ones_like(output_refine, dtype="float32")
            weight = tf.reshape(tf.cast(self.weights, "float32"), [-1])
            loss = seq2seq.sequence_loss_by_example([self.scores], [output_refine], [weight],num_classes);
            self.loss = tf.reduce_sum(loss)/tf.cast(a, "float32") + self.hyperParam["l2_lamda"]*l2_loss
            #self.accuracy = tf.reduce_mean(tf.cast(tf.concat(0, accuracy), "float"))

        with tf.name_scope("accurcy"):
            self.predictions = tf.argmax(tf.reshape(self.scores, [-1, sequece_length, num_classes]), 2)
            #self.kk = tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "int64")
            aa = tf.expand_dims(tf.reshape(tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "float32"), [-1]), 0)
            bb = tf.expand_dims(tf.cast(tf.reshape(self.weights, [-1]), "float32"), 0)
            self.kk = tf.squeeze(tf.matmul(aa, bb, transpose_b=True))/tf.reduce_sum(tf.cast(self.weights, "float32"), [0,1])
             
            self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "float32"), name="accrucy")
コード例 #5
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        additional_cell_args = {}
        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        elif args.model == 'gridlstm':
            cell_fn = grid_rnn.Grid2LSTMCell
            additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0})
        elif args.model == 'gridgru':
            cell_fn = grid_rnn.Grid2GRUCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size, **additional_cell_args)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell,
                                                  loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                                                [tf.reshape(self.targets, [-1])],
                                                [tf.ones([args.batch_size * args.seq_length])],
                                                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #6
0
ファイル: protein_rnn.py プロジェクト: PhiphyZhou/protein
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size  # size for mini batch training
        self.num_steps = num_steps = config.num_steps  # maximum number of training iteration?
        size = config.hidden_size  # state size
        feature_size = config.feature_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps, feature_size])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps, feature_size])

        basic_cell = rnn_cell.BasicLSTMCell(size)
        if is_training and config.keep_prob < 1:  # use dropout
            basic_cell = rnn_cell.DropoutWrapper(basic_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([basic_cell] * config.num_layers)  # multiple layers
        self._initial_state = cell.zero_state(batch_size, tf.float32)

        inputs = self._input_data
        print inputs
        print "haha"
        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        #        inputs = [tf.squeeze(input_, [1])
        #                   for input_ in tf.split(1, num_steps, inputs)]
        #        outputs, states = rnn.rnn(
        #            cell, inputs, initial_state=self._initial_state)
        #
        outputs = []
        states = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)
                states.append(state)

        print outputs
        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        print output
        logits = tf.nn.xw_plus_b(
            output, tf.get_variable("softmax_w", [size, feature_size]), tf.get_variable("softmax_b", [feature_size])
        )

        loss = seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])], [tf.ones([batch_size * num_steps])], feature_size
        )
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = states[-1]

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #7
0
ファイル: seq2seq_test.py プロジェクト: nickicindy/tensorflow
  def testSequenceLossByExample(self):
    with self.test_session() as sess:
      output_classes = 5
      logits = [tf.constant(i + 0.5, shape=[2, output_classes])
                for i in xrange(3)]
      targets = [tf.constant(i, tf.int32, shape=[2]) for i in xrange(3)]
      weights = [tf.constant(1.0, shape=[2]) for i in xrange(3)]

      average_loss_per_example = seq2seq.sequence_loss_by_example(
          logits, targets, weights, output_classes,
          average_across_timesteps=True)
      res = sess.run(average_loss_per_example)
      self.assertAllClose(res, np.asarray([1.609438, 1.609438]))

      loss_per_sequence = seq2seq.sequence_loss_by_example(
          logits, targets, weights, output_classes,
          average_across_timesteps=False)
      res = sess.run(loss_per_sequence)
      self.assertAllClose(res, np.asarray([4.828314, 4.828314]))
コード例 #8
0
ファイル: model.py プロジェクト: kurtespinosa/compoetry
    def __init__(self,
                 rnn_size,
                 num_layers,
                 vocab_size,
                 grad_clip,
                 batch_size=1,
                 seq_length=1):

        cell = rnn_cell.BasicLSTMCell(rnn_size)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * num_layers)

        self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.targets = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable('softmax_w', [rnn_size, vocab_size])
            softmax_b = tf.get_variable('softmax_b', [vocab_size])
            with tf.device('/cpu:0'):
                embedding = tf.get_variable('embedding',
                                            [vocab_size, rnn_size])
                inputs = tf.split(
                    1, seq_length,
                    tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        train = batch_size == 1 and seq_length == 1
        loop_fn = loop if train else None

        outputs, last_state = seq2seq.rnn_decoder(inputs,
                                                  self.initial_state,
                                                  cell,
                                                  loop_function=loop_fn,
                                                  scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([batch_size * seq_length])], vocab_size)
        self.cost = tf.reduce_sum(loss) / batch_size / seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #9
0
    def __init__(self, args, infer=False, loop=0):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm') as scope1:
	    if loop > 0: scope1.reuse_variables()
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #10
0
  def __init__(self, is_training, config):
    self.batch_size = batch_size = config.batch_size
    self.num_steps = num_steps = config.num_steps
    size = config.hidden_size
    output_size = config.output_size

    self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps, size])
    self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])


    lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
    if is_training and config.keep_prob < 1:
      lstm_cell = rnn_cell.DropoutWrapper(
          lstm_cell, output_keep_prob=config.keep_prob)
    cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

    self._initial_state = cell.zero_state(batch_size, tf.float32)

    inputs = self._input_data


    outputs = []
    states = []
    state = self._initial_state
    with tf.variable_scope("RNN"):
      for time_step in range(num_steps):
        if time_step > 0: tf.get_variable_scope().reuse_variables()
        (cell_output, state) = cell(inputs[:, time_step, :], state)
        outputs.append(cell_output)
        states.append(state)

    output = tf.reshape(tf.concat(1, outputs), [-1, size])
    logits = tf.nn.xw_plus_b(output,
                             tf.get_variable("softmax_w", [size, output_size]),
                             tf.get_variable("softmax_b", [output_size]))
    loss = seq2seq.sequence_loss_by_example([logits],
                                            [tf.reshape(self._targets, [-1])],
                                            [tf.ones([batch_size * num_steps])],
                                            output_size)
    self._cost = cost = tf.reduce_sum(loss) / batch_size
    self._final_state = states[-1]
    self._output = output
    self._logits = logits

    if not is_training:
      return

    self._lr = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                      config.max_grad_norm)
    optimizer = tf.train.GradientDescentOptimizer(self.lr)
    self._train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #11
0
ファイル: model.py プロジェクト: lacker/ai
  def __init__(self, args, sampling=False):
    self.args = args
    if sampling:
      args.batch_size = 1
      args.seq_length = 1

    basic_cell = rnn_cell.BasicLSTMCell(args.rnn_size)
    self.cell = rnn_cell.MultiRNNCell([basic_cell] * args.num_layers)

    self.input_data = tf.placeholder(tf.int32,
                                     [args.batch_size, args.seq_length])
    self.targets = tf.placeholder(tf.int32,
                                  [args.batch_size, args.seq_length])
    self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)

    with tf.variable_scope('rnnlm'):
      softmax_w = tf.get_variable("softmax_w",
                                  [args.rnn_size, args.vocab_size])
      softmax_b = tf.get_variable("softmax_b",
                                  [args.vocab_size])
      with tf.device("/cpu:0"):
        embedding = tf.get_variable("embedding",
                                    [args.vocab_size, args.rnn_size])
        inputs = tf.split(1, args.seq_length,
                          tf.nn.embedding_lookup(embedding, self.input_data))
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

    def loop(prev, _):
      prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
      prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
      return tf.nn.embedding_lookup(embedding, prev_symbol)

    outputs, last_state = seq2seq.rnn_decoder(
      inputs, self.initial_state, self.cell,
      loop_function=loop if sampling else None, scope='rnnlm')

    output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
    self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
    self.probs = tf.nn.softmax(self.logits)
    loss = seq2seq.sequence_loss_by_example(
      [self.logits],
      [tf.reshape(self.targets, [-1])],
      [tf.ones([args.batch_size * args.seq_length])],
      args.vocab_size)
    self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
    self.final_state = last_state
    self.lr = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                      args.grad_clip)
    optimizer = tf.train.AdamOptimizer(self.lr)
    self.train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #12
0
    def __init__(self, conf):
        self.conf = conf

        cell_fw = BasicLSTMCell(self.conf.rnn_size)
        cell_bw = BasicLSTMCell(self.conf.rnn_size)
        
        if conf.keep_prob < 1.0 and not conf.infer:
            cell_fw = DropoutWrapper(cell_fw, output_keep_prob=conf.keep_prob)
            cell_bw = DropoutWrapper(cell_bw, output_keep_prob=conf.keep_prob)
        self.cell_fw = cell_fw = MultiRNNCell([cell_fw] * self.conf.num_layers)
        self.cell_bw = cell_bw = MultiRNNCell([cell_bw] * self.conf.num_layers)
        
        self.input_data = tf.placeholder(tf.int32, [self.conf.batch_size, self.conf.seq_length])
        self.targets = tf.placeholder(tf.int32, [self.conf.batch_size, self.conf.seq_length])
    
        self.initial_state_fw = cell_fw.zero_state(self.conf.batch_size, tf.float32)
        
        self.initial_state_bw = cell_bw.zero_state(self.conf.batch_size, tf.float32)
        with tf.variable_scope('rnn'):
            softmax_w = tf.get_variable("softmax_w", [self.conf.rnn_size*2, self.conf.output_size])
            softmax_b = tf.get_variable("softmax_b", [self.conf.output_size])
        
        embedding = tf.get_variable("embedding", [self.conf.nerloader.vocab_size, self.conf.rnn_size])
        _inputs = tf.nn.embedding_lookup(embedding, self.input_data)
        if conf.keep_prob < 1.0 and not conf.infer:
            _inputs = tf.nn.dropout(_inputs,conf.keep_prob)
        inputs = tf.split(1, conf.seq_length, _inputs)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
            
        outputs_bi = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs, initial_state_fw=self.initial_state_fw, initial_state_bw=self.initial_state_bw, scope='rnn')
        output = tf.reshape(tf.concat(1, outputs_bi), [-1, self.conf.rnn_size*2])
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)

        self.loss_weights = [tf.ones([self.conf.batch_size * self.conf.seq_length])]

        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                self.loss_weights)
        self.cost = (tf.reduce_sum(loss) / self.conf.batch_size / self.conf.seq_length)
        tf.scalar_summary("loss",self.cost)
        self.out = output
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                self.conf.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        self.merged_summary_op = tf.merge_all_summaries()
コード例 #13
0
    def __init__(self, vocabularySize, config_param):
        self.vocabularySize = vocabularySize
        self.config = config_param

        self._inputX = tf.placeholder(
            tf.int32, [self.config.batch_size, self.config.sequence_size],
            "InputsX")
        self._inputTargetsY = tf.placeholder(
            tf.int32, [self.config.batch_size, self.config.sequence_size],
            "InputTargetsY")

        #Converting Input in an Embedded form
        with tf.device(
                "/cpu:0"):  #Tells Tensorflow what GPU to use specifically
            embedding = tf.get_variable(
                "embedding", [self.vocabularySize, self.config.embeddingSize])
            embeddingLookedUp = tf.nn.embedding_lookup(embedding, self._inputX)
            inputs = tf.split(1, self.config.sequence_size, embeddingLookedUp)
            inputTensorsAsList = [tf.squeeze(input_, [1]) for input_ in inputs]

        #Define Tensor RNN
        singleRNNCell = rnn_cell.BasicRNNCell(self.config.hidden_size)
        self.multilayerRNN = rnn_cell.MultiRNNCell([singleRNNCell] *
                                                   self.config.num_layers)
        self._initial_state = self.multilayerRNN.zero_state(
            self.config.batch_size, tf.float32)

        #Defining Logits
        hidden_layer_output, states = rnn.rnn(
            self.multilayerRNN,
            inputTensorsAsList,
            initial_state=self._initial_state)
        hidden_layer_output = tf.reshape(tf.concat(1, hidden_layer_output),
                                         [-1, self.config.hidden_size])
        self._logits = tf.nn.xw_plus_b(
            hidden_layer_output,
            tf.get_variable("softmax_w",
                            [self.config.hidden_size, self.vocabularySize]),
            tf.get_variable("softmax_b", [self.vocabularySize]))
        self._predictionSoftmax = tf.nn.softmax(self._logits)

        #Define the loss
        loss = seq2seq.sequence_loss_by_example(
            [self._logits], [tf.reshape(self._inputTargetsY, [-1])],
            [tf.ones([self.config.batch_size * self.config.sequence_size])],
            self.vocabularySize)
        self._cost = tf.div(tf.reduce_sum(loss), self.config.batch_size)

        self._final_state = states[-1]
コード例 #14
0
    def __init__(self, args, infer=False):
        self.args = args
        # if infer:
        #     args.batch_size = 1
        #     args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        #self.seq_length = tf.placeholder(tf.int32)
        #args.seq_length = self.seq_length
        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding",
                                            [args.vocab_size, args.rnn_size])
                inputs = tf.split(
                    1, args.seq_length,
                    tf.nn.embedding_lookup(embedding, self.input_data))
                # len(inputs)==args.seq_length, shape(inputs[0])==(args.batch_size, args.rnn_size)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            return None  # TODO
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        # len(outputs)==args.seq_length, shape(outputs[0])==(args.batch_size, args.rnn_size)
        outputs, states = seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if infer else None,
            scope='rnnlm')

        # # shape(output) = (batch_size*seq_length, rnn_size)
        # output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        def handle_outputs(use_lastone=True):
            """ Shape of return is [batch_size, rnn_size].
            """
            if use_lastone:
                return outputs[-1]
            output = tf.add_n(outputs)
            output = tf.div(output, len(outputs))
            return output

        output = handle_outputs(use_lastone=False)
        # shape(logits) = (batch_size, vocab_size)
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size])], args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size
        _ = tf.scalar_summary('cost', self.cost)

        # Evaluate accuracy
        correct_pred = tf.equal(tf.cast(tf.argmax(self.logits, 1), tf.int32),
                                tf.reshape(self.targets, [-1]))
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        _ = tf.scalar_summary('accuracy', self.accuracy)

        self.final_state = states
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #15
0
    def __init__(self,
                 vocab,
                 tagset,
                 alphabet,
                 word_embedding_size,
                 char_embedding_size,
                 num_chars,
                 num_steps,
                 optimizer_desc,
                 generate_lemmas,
                 l2,
                 dropout_prob_values,
                 experiment_name,
                 supply_form_characters_to_lemma,
                 threads=0,
                 seed=None,
                 write_summaries=True,
                 use_attention=True,
                 scheduled_sampling=None):
        """
        Builds the tagger computation graph and initializes it in a TensorFlow
        session.

        Arguments:

            vocab: Vocabulary of word forms.

            tagset: Vocabulary of possible tags.

            alphabet: Vocabulary of possible characters.

            word_embedding_size (int): Size of the form-based word embedding.

            char_embedding_size (int): Size of character embeddings, i.e. a
                half of the size of the character-based words embeddings.

            num_chars: Maximum length of a word.

            num_steps: Maximum lenght of a sentence.

            optimizer_desc: Description of the optimizer.

            generate_lemmas: Generate lemmas during tagging.

            seed: TensorFlow seed

            write_summaries: Write summaries using TensorFlow interface.
        """

        self.num_steps = num_steps
        self.num_chars = num_chars

        self.word_embedding_size = word_embedding_size
        self.char_embedding_size = char_embedding_size
        self.lstm_size = word_embedding_size + 2 * char_embedding_size  ###

        self.vocab = vocab
        self.tagset = tagset
        self.alphabet = alphabet

        self.dropout_prob_values = dropout_prob_values

        self.forward_initial_state = tf.placeholder(
            tf.float32,
            [None, rnn_cell.BasicLSTMCell(self.lstm_size).state_size],
            name="forward_lstm_initial_state")
        self.backward_initial_state = tf.placeholder(
            tf.float32,
            [None, rnn_cell.BasicLSTMCell(self.lstm_size).state_size],
            name="backward_lstm_initial_state")
        self.sentence_lengths = tf.placeholder(tf.int64, [None],
                                               name="sentence_lengths")
        self.tags = tf.placeholder(tf.int32, [None, num_steps],
                                   name="ground_truth_tags")
        self.dropout_prob = tf.placeholder(tf.float32, [None],
                                           name="dropout_keep_p")
        self.generate_lemmas = generate_lemmas

        global_step = tf.Variable(0, trainable=False)

        input_list = []
        regularize = []

        # Word-level embeddings
        if word_embedding_size:
            self.words = tf.placeholder(tf.int32, [None, num_steps],
                                        name='words')
            word_embeddings = tf.Variable(
                tf.random_uniform([len(vocab), word_embedding_size], -1.0,
                                  1.0))
            we_lookup = tf.nn.embedding_lookup(word_embeddings, self.words)

            input_list.append(we_lookup)

        # Character-level embeddings
        if char_embedding_size:
            self.chars = tf.placeholder(tf.int32, [None, num_steps, num_chars],
                                        name='chars')
            self.chars_lengths = tf.placeholder(tf.int64, [None, num_steps],
                                                name='chars_lengths')

            char_embeddings = \
                tf.Variable(tf.random_uniform([len(alphabet), char_embedding_size], -1.0, 1.0))
            ce_lookup = tf.nn.embedding_lookup(char_embeddings, self.chars)

            reshaped_ce_lookup = tf.reshape(
                ce_lookup, [-1, num_chars, char_embedding_size],
                name="reshape-char_inputs")
            char_inputs = [
                tf.squeeze(input_, [1])
                for input_ in tf.split(1, num_chars, reshaped_ce_lookup)
            ]

            char_inputs_lengths = tf.reshape(self.chars_lengths, [-1])

            with tf.variable_scope('char_forward'):
                char_lstm = rnn_cell.BasicLSTMCell(char_embedding_size)
                _, char_last_state = rnn.rnn(
                    cell=char_lstm,
                    inputs=char_inputs,
                    sequence_length=char_inputs_lengths,
                    dtype=tf.float32)
                tf.get_variable_scope().reuse_variables()
                regularize.append(
                    tf.get_variable('RNN/BasicLSTMCell/Linear/Matrix'))

            with tf.variable_scope('char_backward'):
                char_lstm_rev = rnn_cell.BasicLSTMCell(char_embedding_size)
                _, char_last_state_rev = rnn.rnn(
                    cell=char_lstm_rev,
                    inputs=self._reverse_seq(char_inputs, char_inputs_lengths),
                    sequence_length=char_inputs_lengths,
                    dtype=tf.float32)
                tf.get_variable_scope().reuse_variables()
                regularize.append(
                    tf.get_variable('RNN/BasicLSTMCell/Linear/Matrix'))

            last_char_lstm_state = tf.split(1, 2, char_last_state)[1]
            last_char_lstm_state_rev = tf.split(1, 2, char_last_state_rev)[1]

            last_char_states = \
                tf.reshape(last_char_lstm_state, [-1, num_steps, char_embedding_size],
                           name="reshape-charstates")
            last_char_states_rev = tf.reshape(
                last_char_lstm_state_rev, [-1, num_steps, char_embedding_size],
                name="reshape-charstates_rev")

            char_output = tf.concat(2,
                                    [last_char_states, last_char_states_rev])

            input_list.append(char_output)

        # All inputs correctly sliced
        input_list_dropped = [
            tf.nn.dropout(x, self.dropout_prob[0]) for x in input_list
        ]
        inputs = [
            tf.squeeze(input_, [1]) for input_ in tf.split(
                1, num_steps, tf.concat(2, input_list_dropped))
        ]

        with tf.variable_scope('forward'):
            lstm = rnn_cell.BasicLSTMCell(self.lstm_size)
            outputs, last_state = rnn.rnn(
                cell=lstm,
                inputs=inputs,
                dtype=tf.float32,
                initial_state=self.forward_initial_state,
                sequence_length=self.sentence_lengths)

            tf.get_variable_scope().reuse_variables()
            regularize.append(
                tf.get_variable('RNN/BasicLSTMCell/Linear/Matrix'))

        with tf.variable_scope('backward'):
            lstm_rev = rnn_cell.BasicLSTMCell(self.lstm_size)
            outputs_rev_rev, last_state_rev = rnn.rnn(
                cell=lstm_rev,
                inputs=self._reverse_seq(inputs, self.sentence_lengths),
                dtype=tf.float32,
                initial_state=self.backward_initial_state,
                sequence_length=self.sentence_lengths)

            outputs_rev = self._reverse_seq(outputs_rev_rev,
                                            self.sentence_lengths)

            tf.get_variable_scope().reuse_variables()
            regularize.append(
                tf.get_variable('RNN/BasicLSTMCell/Linear/Matrix'))

        #outputs_forward = tf.reshape(tf.concat(1, outputs), [-1, self.lstm_size],
        #                    name="reshape-outputs_forward")

        #outputs_backward = tf.reshape(tf.concat(1, outputs_rev), [-1, self.lstm_size],
        #                    name="reshape-outputs_backward")

        #forward_w = tf.get_variable("forward_w", [self.lstm_size, self.lstm_size])
        #backward_w = tf.get_variable("backward_w", [self.lstm_size, self.lstm_size])
        #non_linearity_bias = tf.get_variable("non_linearity_b", [self.lstm_size])

        outputs_bidi = [
            tf.concat(1, [o1, o2])
            for o1, o2 in zip(outputs, reversed(outputs_rev))
        ]

        #output = tf.tanh(tf.matmul(outputs_forward, forward_w) + tf.matmul(outputs_backward, backward_w) + non_linearity_bias)
        output = tf.reshape(tf.concat(1, outputs_bidi),
                            [-1, 2 * self.lstm_size],
                            name="reshape-outputs_bidi")
        output_dropped = tf.nn.dropout(output, self.dropout_prob[1])

        # We are computing only the logits, not the actual softmax -- while
        # computing the loss, it is done by the sequence_loss_by_example and
        # during the runtime classification, the argmax over logits is enough.

        softmax_w = tf.get_variable(
            "softmax_w", [2 * self.lstm_size, len(tagset)])
        logits_flatten = tf.nn.xw_plus_b(
            output_dropped, softmax_w,
            tf.get_variable("softmax_b", [len(tagset)]))
        #tf.get_variable_scope().reuse_variables()
        regularize.append(softmax_w)

        self.logits = tf.reshape(logits_flatten,
                                 [-1, num_steps, len(tagset)],
                                 name="reshape-logits")
        estimated_tags_flat = tf.to_int32(
            tf.argmax(logits_flatten, dimension=1))
        self.last_state = last_state

        # output maks: compute loss only if it insn't a padded word (i.e. zero index)
        output_mask = tf.reshape(tf.to_float(tf.not_equal(self.tags, 0)), [-1])

        gt_tags_flat = tf.reshape(self.tags, [-1])
        tagging_loss = seq2seq.sequence_loss_by_example(
            logits=[logits_flatten],
            targets=[gt_tags_flat],
            weights=[output_mask])

        tagging_accuracy = \
            tf.reduce_sum(tf.to_float(tf.equal(estimated_tags_flat, gt_tags_flat)) * output_mask) \
                / tf.reduce_sum(output_mask)
        tf.scalar_summary('train_accuracy',
                          tagging_accuracy,
                          collections=["train"])
        tf.scalar_summary('dev_accuracy',
                          tagging_accuracy,
                          collections=["dev"])

        self.cost = tf.reduce_mean(tagging_loss)

        tf.scalar_summary('train_tagging_loss',
                          tf.reduce_mean(tagging_loss),
                          collections=["train"])
        tf.scalar_summary('dev_tagging_loss',
                          tf.reduce_mean(tagging_loss),
                          collections=["dev"])

        if generate_lemmas:
            with tf.variable_scope('decoder'):
                self.lemma_chars = tf.placeholder(
                    tf.int32, [None, num_steps, num_chars + 2],
                    name='lemma_chars')

                lemma_state_size = self.lstm_size

                lemma_w = tf.Variable(tf.random_uniform(
                    [lemma_state_size, len(alphabet)], 0.5),
                                      name="state_to_char_w")
                lemma_b = tf.Variable(tf.fill([len(alphabet)],
                                              -math.log(len(alphabet))),
                                      name="state_to_char_b")
                lemma_char_embeddings = tf.Variable(tf.random_uniform([
                    len(alphabet), lemma_state_size /
                    (2 if supply_form_characters_to_lemma else 1)
                ], -0.5, 0.5),
                                                    name="char_embeddings")

                lemma_char_inputs = \
                    [tf.squeeze(input_, [1]) for input_ in
                        tf.split(1, num_chars + 2, tf.reshape(self.lemma_chars, [-1, num_chars + 2],
                                                              name="reshape-lemma_char_inputs"))]

                if supply_form_characters_to_lemma:
                    char_inputs_zeros = \
                        [tf.squeeze(chars, [1]) for chars in
                            tf.split(1, num_chars, tf.reshape(self.chars, [-1, num_chars],
                                                              name="reshape-char_inputs_zeros"))]
                    char_inputs_zeros.append(char_inputs_zeros[0] * 0)

                    def loop(prev_state, i):
                        # it takes the previous hidden state, finds the character and formats it
                        # as input for the next time step ... used in the decoder in the "real decoding scenario"
                        out_activation = tf.matmul(prev_state,
                                                   lemma_w) + lemma_b
                        prev_char_index = tf.argmax(out_activation, 1)
                        return tf.concat(1, [
                            tf.nn.embedding_lookup(lemma_char_embeddings,
                                                   prev_char_index),
                            tf.nn.embedding_lookup(lemma_char_embeddings,
                                                   char_inputs_zeros[i])
                        ])

                    embedded_lemma_characters = []
                    for lemma_chars, form_chars in zip(lemma_char_inputs[:-1],
                                                       char_inputs_zeros):
                        embedded_lemma_characters.append(
                            tf.concat(1, [
                                tf.nn.embedding_lookup(lemma_char_embeddings,
                                                       lemma_chars),
                                tf.nn.embedding_lookup(lemma_char_embeddings,
                                                       form_chars)
                            ]))
                else:

                    def loop(prev_state, _):
                        # it takes the previous hidden state, finds the character and formats it
                        # as input for the next time step ... used in the decoder in the "real decoding scenario"
                        out_activation = tf.matmul(prev_state,
                                                   lemma_w) + lemma_b
                        prev_char_index = tf.argmax(out_activation, 1)
                        return tf.nn.embedding_lookup(lemma_char_embeddings,
                                                      prev_char_index)

                    embedded_lemma_characters = []
                    for lemma_chars in lemma_char_inputs[:-1]:
                        embedded_lemma_characters.append(
                            tf.nn.embedding_lookup(lemma_char_embeddings,
                                                   lemma_chars))

                def sampling_loop(prev_state, i):
                    threshold = scheduled_sampling / (
                        scheduled_sampling + tf.exp(tf.to_float(global_step)))
                    condition = tf.less_equal(
                        tf.random_uniform(
                            tf.shape(embedded_lemma_characters[0])), threshold)
                    return tf.select(condition, embedded_lemma_characters[i],
                                     loop(prev_state, i))

                decoder_cell = rnn_cell.BasicLSTMCell(lemma_state_size)

                if scheduled_sampling:
                    lf = sampling_loop
                else:
                    lf = None

                if use_attention:
                    lemma_outputs_train, _ = seq2seq.attention_decoder(
                        embedded_lemma_characters,
                        output_dropped,
                        reshaped_ce_lookup,
                        decoder_cell,
                        loop_function=lf)
                else:
                    lemma_outputs_train, _ = seq2seq.rnn_decoder(
                        embedded_lemma_characters,
                        output_dropped,
                        decoder_cell,
                        loop_function=lf)

                tf.get_variable_scope().reuse_variables()
                #regularize.append(tf.get_variable('attention_decoder/BasicLSTMCell/Linear/Matrix'))

                tf.get_variable_scope().reuse_variables()

                if use_attention:
                    lemma_outputs_runtime, _ = \
                        seq2seq.attention_decoder(embedded_lemma_characters, output_dropped, reshaped_ce_lookup, decoder_cell,
                            loop_function=loop)
                else:
                    lemma_outputs_runtime, _ = \
                        seq2seq.rnn_decoder(embedded_lemma_characters, output_dropped, decoder_cell,
                            loop_function=loop)

                lemma_char_logits_train = \
                    [tf.matmul(o, lemma_w) + lemma_b for o in lemma_outputs_train]

                lemma_char_logits_runtime = \
                    [tf.matmul(o, lemma_w) + lemma_b for o in lemma_outputs_runtime]

                self.lemmas_decoded = \
                    tf.reshape(tf.transpose(tf.argmax(tf.pack(lemma_char_logits_runtime), 2)), [-1, num_steps, num_chars + 1])

                lemma_char_weights = []
                for lemma_chars in lemma_char_inputs[1:]:
                    lemma_char_weights.append(
                        tf.to_float(tf.not_equal(lemma_chars, 0)))

                lemmatizer_loss = seq2seq.sequence_loss(
                    lemma_char_logits_train, lemma_char_inputs[1:],
                    lemma_char_weights)

                lemmatizer_loss_runtime = \
                        seq2seq.sequence_loss(lemma_char_logits_runtime, lemma_char_inputs[1:],
                                              lemma_char_weights)

                tf.scalar_summary('train_lemma_loss_with_gt_inputs',
                                  tf.reduce_mean(lemmatizer_loss),
                                  collections=["train"])
                tf.scalar_summary('dev_lemma_loss_with_gt_inputs',
                                  tf.reduce_mean(lemmatizer_loss),
                                  collections=["dev"])

                tf.scalar_summary('train_lemma_loss_with_decoded_inputs',
                                  tf.reduce_mean(lemmatizer_loss_runtime),
                                  collections=["train"])
                tf.scalar_summary('dev_lemma_loss_with_decoded_inputs',
                                  tf.reduce_mean(lemmatizer_loss_runtime),
                                  collections=["dev"])

                self.cost += tf.reduce_mean(lemmatizer_loss) + tf.reduce_mean(
                    lemmatizer_loss_runtime)

        self.cost += l2 * sum(
            [tf.nn.l2_loss(variable) for variable in regularize])

        tf.scalar_summary('train_optimization_cost',
                          self.cost,
                          collections=["train"])
        tf.scalar_summary('dev_optimization_cost',
                          self.cost,
                          collections=["dev"])

        def decay(learning_rate, exponent, iteration_steps):
            return tf.train.exponential_decay(learning_rate,
                                              global_step,
                                              iteration_steps,
                                              exponent,
                                              staircase=True)

        optimizer = eval('tf.train.' + optimizer_desc)
        self.train = optimizer.minimize(self.cost, global_step=global_step)

        if threads > 0:
            self.session = tf.Session(
                config=tf.ConfigProto(inter_op_parallelism_threads=threads,
                                      intra_op_parallelism_threads=threads))
        else:
            self.session = tf.Session()
        self.session.run(tf.initialize_all_variables())

        if write_summaries:
            self.summary_train = tf.merge_summary(tf.get_collection("train"))
            self.summary_dev = tf.merge_summary(tf.get_collection("dev"))
            timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
            self.summary_writer = tf.train.SummaryWriter("logs/" + timestamp +
                                                         "_" + experiment_name)

        self.steps = 0
コード例 #16
0
ファイル: interactive.py プロジェクト: liamb315/TensorFlow
		inputs_dis    = [tf.matmul(tf.squeeze(i, [1]), embedding) for i in inputs_dis]

state   = initial_state_dis
outputs = []

for i, inp in enumerate(inputs_dis):
	if i > 0:
		tf.get_variable_scope().reuse_variables()
	output, state = cell_dis(inp, state)
	outputs.append(output)
last_state = state

output_tf   = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
logits = tf.nn.xw_plus_b(output_tf, softmax_w, softmax_b)
probs  = tf.nn.softmax(logits)

loss = seq2seq.sequence_loss_by_example(
	[logits],
	[tf.reshape(targets, [-1])], 
	[tf.ones([args.batch_size * args.seq_length])],
	2)

cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length

final_state = last_state
lr          = tf.Variable(0.0, trainable = False)
tvars 	         = tf.trainable_variables()
grads, _         = tf.clip_by_global_norm(tf.gradients(cost, tvars, aggregation_method = 2), args.grad_clip)
optimizer        = tf.train.AdamOptimizer(lr)
train_op    = optimizer.apply_gradients(zip(grads, tvars))
コード例 #17
0
ファイル: rnnlm-nb.py プロジェクト: czhiming/tensorflow_code
    # def loop(prev, _):
    #     prev = tf.matmul(prev, softmax_w) + softmax_b
    #     prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
    #     return tf.nn.embedding_lookup(embeddings, prev_symbol)

    inputs = tf.split(1, seq_length,
                      tf.nn.embedding_lookup(embeddings, input_data))
    inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

    outputs, last_state = seq2seq.rnn_decoder(inputs, initial_state, cell)
    output = tf.reshape(tf.concat(1, outputs), [-1, hidden_num])
    logits = tf.matmul(output, softmax_w) + softmax_b
    probs = tf.nn.softmax(logits)
    loss_rnn = seq2seq.sequence_loss_by_example(
        [logits], [tf.reshape(targets, [-1])],
        [tf.ones([batch_size * seq_length])], vocab_size)
    cost = tf.reduce_sum(loss_rnn) / batch_size / seq_length
    final_state = last_state
    lr = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
    optimizer = tf.train.AdagradOptimizer(0.1)
    train_op = optimizer.apply_gradients(zip(grads, tvars))

    #输出词向量
    embeddings_norm = tf.sqrt(
        tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
    normalized_embeddings = embeddings / embeddings_norm

#模型训练
コード例 #18
0
ファイル: main.py プロジェクト: fomorians/lstm-odyssey
    def __init__(self, CellType, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps], name="input_data")
        self.targets = tf.placeholder(tf.int32, [batch_size, num_steps], name="targets")

        lstm_cell = CellType(size)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
        self.initial_state = cell.zero_state(batch_size, tf.float32)

        # initializer used for reusable variable initializer (see `get_variable`)
        initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size], initializer=initializer)
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        outputs = []
        states = []
        state = self.initial_state

        with tf.variable_scope("RNN", initializer=initializer):
            for time_step in range(num_steps):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()

                inputs_slice = inputs[:,time_step,:]
                (cell_output, state) = cell(inputs_slice, state)

                outputs.append(cell_output)
                states.append(state)

        self.final_state = states[-1]

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        w = tf.get_variable("softmax_w",
                                    [size, vocab_size],
                                    initializer=initializer)
        b = tf.get_variable("softmax_b", [vocab_size], initializer=initializer)

        logits = tf.nn.xw_plus_b(output, w, b) # compute logits for loss
        targets = tf.reshape(self.targets, [-1]) # reshape our target outputs
        weights = tf.ones([batch_size * num_steps]) # used to scale the loss average

        # computes loss and performs softmax on our fully-connected output layer
        loss = sequence_loss_by_example([logits], [targets], [weights], vocab_size)
        self.cost = cost = tf.div(tf.reduce_sum(loss), batch_size, name="cost")

        if is_training:
            # setup learning rate variable to decay
            self.lr = tf.Variable(1.0, trainable=False)

            # define training operation and clip the gradients
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm)
            optimizer = tf.train.GradientDescentOptimizer(self.lr)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars), name="train")
        else:
            # if this model isn't for training (i.e. testing/validation) then we don't do anything here
            self.train_op = tf.no_op()
コード例 #19
0
# with tf.Session() as sess:
# sess = tf.InteractiveSession()
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
# tensors to store model state and training data for each batch
seqs = [tf.placeholder(tf.int32, shape=[_seq_length]) for _ in xrange(_batch_size)]
encoder_inputs = [tf.placeholder(tf.int32, shape=[_seq_length]) for _ in xrange(_batch_size)]
decoder_inputs = [tf.placeholder(tf.int32, shape=[_seq_length]) for _ in xrange(_batch_size)]
targets = [tf.placeholder(tf.int32, shape=[_seq_length]) for _ in xrange(_batch_size)]
target_weights = [tf.ones(dtype=tf.float32, shape=[_seq_length]) for _ in xrange(_batch_size)]

# set up the tied seq-to-seq LSTM with given parameters
single_cell = rnn_cell.BasicLSTMCell(_lstm_cell_dimension)
cell = rnn_cell.MultiRNNCell([single_cell] * _lstm_num_layers)
outputs, _ = seq2seq.embedding_tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell,
                                                _vocab_size_including_GO)
seqloss = seq2seq.sequence_loss_by_example(outputs, encoder_inputs, target_weights,
                                           _vocab_size_including_GO)

tf.train.SummaryWriter(_train_log_dir, sess.graph_def)
global_step = tf.Variable(0, name='global_step', trainable=False)
sess.run(tf.initialize_all_variables())

# Set up the optimizer with gradient clipping
params = tf.trainable_variables()
gradients = tf.gradients(seqloss, params)
optimizer = tf.train.GradientDescentOptimizer(_lstm_learn_rate)
clipped_gradients, norm = tf.clip_by_global_norm(gradients,
                                                 _lstm_max_grad_norm)
train_op = optimizer.apply_gradients(zip(clipped_gradients, params),
                                     global_step=global_step)

# train_step = tf.train.GradientDescentOptimizer(_lstm_learn_rate).minimize(seqloss)
コード例 #20
0
ファイル: t2t_tf.py プロジェクト: ems7/asr_2016
    def __init__(self, session, config, training_flag=False):
        
        # get configuration from config class
        vocab_size    = config.vocab_size
        size          = config.size
        net_type      = config.net_type
        batch_size    = config.batch_size
        num_steps     = config.num_steps
        max_grad_norm = config.max_grad_norm 
        forget_bias   = config.forget_bias
        keep_prob     = config.keep_prob

        #create placeholders for input, answers, learning rate
        self._input_data   = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._answers      = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._lr           = tf.placeholder(tf.float32,  name='learning_rate')
        
        #create cell, either GRU or LSTM as defined by the config class
        if net_type == "LSTM":
            cell     = rnn_cell.BasicLSTMCell(size, forget_bias=forget_bias)    
        elif net_type == "GRU":
            cell     = rnn_cell.GRUCell(size)
        else:
            print("Unknown network type. config.net_type must be GRU or LSTM")
        
        #create multiple layers of cells defined by config.num_layer
        cell_layers = rnn_cell.MultiRNNCell([cell] * config.num_layers)
        
        #set the initial state of the network
        self._initial_state          = cell_layers.zero_state(batch_size, tf.float32)
        
        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size])
            inputs    = tf.nn.embedding_lookup(embedding, self._input_data)            

        if training_flag and keep_prob < 1:
            inputs = tf.nn.dropout(inputs, keep_prob)
        
        inputs = [tf.squeeze(input_, [1])for input_ in tf.split(1, num_steps, inputs)]
            
        #pass inputs through the cell      
        outputs, states = RNN.rnn(cell_layers, inputs, initial_state=self._initial_state)
        
        # get the final state of the network after input has passed through                
        self._final_state = states
        
        output    = tf.reshape(tf.concat(1, outputs), [-1, size])
        softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        
        self._logits = logits  = tf.matmul(output, softmax_w) + softmax_b
        
        self._soft_out = soft_out = tf.nn.softmax(logits, name='soft_max')
        correct_prediction = 1 if tf.arg_max(soft_out, 1) == self._answers else 0
        self._accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        
        loss     = seq2seq.sequence_loss_by_example([logits],
                                            [tf.reshape(self._answers, [-1])],
                                            [tf.ones([batch_size * num_steps])],
                                            vocab_size)
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        
        if not training_flag:
            return

        tvars          = tf.trainable_variables()
        grads, _       = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                      config.max_grad_norm)
        optimizer      = tf.train.AdagradOptimizer(self.lr, initial_accumulator_value=0.1)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #21
0
    def __init__(self, args, predict=False):

        self.args = args 
        if predict:
            batchSize = 1
            numSteps = 1

        # Various parameters for the LSTM. 
        # Hardcoded here for now.
        numSteps = 50 # Steps to unroll for
        batchSize = 50
        rnnSize = 128
        numLayers = 2
        gradClip = 5
        learningRate = 0.002
        decayRate = 0.97

        #Create LSTM layer and stack multiple layers. 
        lstmCell = rnn_cell.BasicLSTMCell(rnnSize)
        lstmNet = rnn_cell.MultiRNNCell([lstmCell] * numLayers)

        #Define placeholders.
        self.inputData = tf.placeholder(tf.int32, [batchSize, numSteps])
        self.targetOutput = tf.placeholder(tf.int32, [batchSize, numSteps])
        self.initialState = lstmNet.zero_state(batchSize, tf.float32)

        # If rnn_decoder is told to loop, this function will return to it the output at time
        # 't' for feeding as the input at time 't+1'. During training, this is generally
        # not done because we want to feed the *correct* input at all times and not what
        # is output. During prediction/testing, we loop the output back to the input to
        # generate our sequence of notes. 
        def feedBack(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        with tf.variable_scope('nn_lstm'):
            softmax_w = tf.get_variable("softmax_w", [rnnSize, args.vocabSize])
            softmax_b = tf.get_variable("softmax_b", [args.vocabSize])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocabSize, rnnSize])
                inputs = tf.split(1, numSteps, tf.nn.embedding_lookup(embedding, self.inputData))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
   
        #Call seq2seq rnn decoder.
        outputs, states = seq2seq.rnn_decoder(inputs, self.initialState, lstmNet, loop_function=feedBack if predict else None, scope='nn_lstm')
        output = tf.reshape(tf.concat(1, outputs), [-1, rnnSize])

        #Logit and probability
        #softmax_w = tf.get_variable("softmax_w", rnnSize, [args.vocabSize])
        #softmax_b = tf.get_variable("softmax_b", [args.vocabSize])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)

        # Calculate loss compared to targetOutput
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targetOutput, [-1])],
                [tf.ones([batchSize * numSteps])],
                args.vocabSize)

        # Set the cost to minimize total loss.
        self.cost = tf.reduce_sum(loss)

        # Learning rate remains constant (not trainable)
        self.finalState = states[-1]
        self.learningRate = tf.Variable(0.0, trainable=False)

        # Define gradient and trainable variables for adjusting 
        # during training/optimization.
        trainableVars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, trainableVars),
                gradClip)

        # We use the Adam optimizer.
        #optimizer = tf.train.GradientDescentOptimizer(self.learningRate).minimize(loss)
        #optimizer = tf.train.AdagradOptimizer(self.learningRate, initial_accumulator_value=0.1)
        #self.trainStep = optimizer.apply_gradients(zip(grads, trainableVars))
        optimizer = tf.train.AdamOptimizer(self.learningRate)
        self.trainStep = optimizer.apply_gradients(zip(grads, trainableVars))
コード例 #22
0
  def __init__(self, is_training, config, decode_only=False):
    self.batch_size = batch_size = config.batch_size
    self.num_steps = num_steps = config.num_steps
    size = config.hidden_size
    self.is_training = is_training
    vocab_size = config.vocab_size

    self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
    self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

    # Slightly better results can be obtained with forget gate biases
    # initialized to 1 but the hyperparameters of the model would need to be
    # different than reported in the paper.
    with tf.variable_scope("cell_encoder"):
      lstm_encoder_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
      if is_training and config.keep_prob < 1:
        lstm_encoder_cell = rnn_cell.DropoutWrapper(
            lstm_encoder_cell, output_keep_prob=config.keep_prob)
      cell_encoder = rnn_cell.MultiRNNCell([lstm_encoder_cell] * config.num_layers)

      # this is the linear projection layer down to num_encoder_symbols = 2*config.z_dim
      cell_encoder = rnn_cell.OutputProjectionWrapper(cell_encoder, 2 * config.z_dim)

      self._initial_state_encoder = cell_encoder.zero_state(batch_size, tf.float32)


    with tf.variable_scope("cell_decoder"):
      lstm_decoder_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
      if is_training and config.keep_prob < 1:
        lstm_decoder_cell = rnn_cell.DropoutWrapper(
            lstm_decoder_cell, output_keep_prob=config.keep_prob)
      cell_decoder = rnn_cell.MultiRNNCell([lstm_decoder_cell] * config.num_layers)

      self._initial_state_decoder = cell_decoder.zero_state(batch_size, tf.float32)

    with tf.device("/cpu:0"):
      with tf.variable_scope("embedding"):
        embedding = tf.get_variable("embedding", [vocab_size, size])
      inputs = tf.split(
          1, num_steps, tf.nn.embedding_lookup(embedding, self._input_data))
      inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

    if is_training and config.keep_prob < 1:
      inputs = [tf.nn.dropout(input_, config.keep_prob) for input_ in inputs]

    # initial inputs
    inputs_encoder = inputs

    outputs_encoder, states_encoder = rnn.rnn(cell_encoder, inputs_encoder, initial_state=self._initial_state_encoder)

    # split the outputs to mu and log_sigma
    mu_and_log_sigmas = [tf.split(1, 2, output_encoder) for output_encoder in outputs_encoder]
    mus = [mu_and_log_sigma[0] for mu_and_log_sigma in mu_and_log_sigmas]
    log_sigmas = [mu_and_log_sigma[1] for mu_and_log_sigma in mu_and_log_sigmas]

    # epsilon is sampled from N(0,1) for location-scale transform
    epsilons = [tf.random_normal([config.batch_size, config.z_dim], dtype=tf.float32) for i in range(len(log_sigmas))]

    # do the location-scale transform
    z_samples = [tf.add(mu, tf.mul(tf.exp(log_sigma), epsilon)) for mu, log_sigma, epsilon in zip(mus, log_sigmas, epsilons)]
    if decode_only:
      # if we're decoding, just sample from a random normal
      z_samples = [tf.random_normal([1, config.z_dim], dtype=tf.float32) for i in range(len(z_samples))]

    # calculate KL. equation 10 from kingma - auto-encoding variational bayes.
    neg_KL_list = [tf.add_n([tf.ones_like(mu), tf.log(tf.square(tf.exp(log_sigma))), tf.neg(tf.square(mu)), tf.neg(tf.square(tf.exp(log_sigma)))]) for mu, log_sigma in zip(mus, log_sigmas)]

    # multiply by 0.5
    neg_KL_list = [tf.mul(tf.constant(0.5, shape=[1, config.z_dim]), KL_term) for KL_term in neg_KL_list]

    # merge the list like we merge the outputs
    neg_KL = tf.reshape(tf.concat(1, neg_KL_list), [-1, config.z_dim])

    # no pure decoding opt
    # outputs_decoder, states_decoder = rnn_decoder(decoder_inputs, self._initial_state_decoder, cell_decoder)

    softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
    softmax_b = tf.get_variable("softmax_b", [vocab_size])

    # concatenate z_samples with previous timesteps
    # decoder_inputs = [tf.concat(1, [single_input, z_sample]) for single_input, z_sample in zip(inputs_encoder, z_samples)]
    # outputs_decoder, states_decoder = rnn_decoder_argmax(decoder_inputs, self._initial_state_decoder, cell_decoder, vocab_size,
    #   output_projection=[softmax_w, softmax_b],
    #   feed_previous=True,
    #   config=config)

    # refactored to be like sam's
    outputs_decoder, states_decoder = vae_decoder_argmax(
      inputs_encoder, z_samples, self._initial_state_decoder, cell_decoder, vocab_size,
      output_projection=[softmax_w, softmax_b],
      feed_previous=True,
      config=config)

    # final output
    # change to vanilla lstm
    outputs = outputs_encoder

    # do a softmax over the vocabulary using the decoder outputs!
    output = tf.reshape(tf.concat(1, outputs), [-1, size])
    logits = tf.nn.xw_plus_b(output,
                             softmax_w,
                             softmax_b)

    NLL = seq2seq.sequence_loss_by_example([logits],
                                            [tf.reshape(self._targets, [-1])],
                                            [tf.ones([batch_size * num_steps])],
                                            vocab_size)

    NLL_scalar = tf.reduce_sum(NLL)
    KL_scalar = tf.neg(tf.reduce_sum(neg_KL))

    # here we compute the *NEGATIVE* ELBO (because we don't know how the optimizer deals with negative learning rates / gradients)
    # the loss in seq2seq.sequence_loss_by_example is the cross-entropy, which is the *negative* log-likelihood, so we can add it.
    neg_ELBO = KL_scalar + NLL_scalar# / batch_size

    # grads_unclipped = tf.gradients(neg_ELBO, tvars)
    # grads, _ = tf.clip_by_global_norm(grads_unclipped,
    #                                   config.max_grad_norm)

    def normalize(tensor):
      return tf.reduce_sum(
      tf.mul(tf.constant(1/(batch_size * self.num_steps), shape=tensor.get_shape()), tensor))

    # summaries
    neg_ELBO_normalized = normalize(neg_ELBO)
    KL_normalized = normalize(KL_scalar)
    NLL_normalized = normalize(NLL_scalar)
    neg_ELBO_summary = tf.scalar_summary("neg_ELBO_normalized", neg_ELBO_normalized)
    KL_summary = tf.scalar_summary('KL_normalized', KL_normalized)
    NLL_summary = tf.scalar_summary('NLL_normalized', NLL_normalized)

    # expose costs, h
    self._neg_ELBO = neg_ELBO
    self._KL_scalar = KL_scalar
    self._NLL_scalar = NLL_scalar
    self._final_state = states_encoder[-1]

    if decode_only:
      self._logits = logits
      return

    if not is_training:
      return

    self._lr = tf.Variable(0.0, trainable=False, name='learning_rate')
    tvars = tf.trainable_variables()
    tvar_names = [tvar.name for tvar in tvars]

    grads_unclipped = tf.gradients(neg_ELBO, tvars)
    grads, _ = tf.clip_by_global_norm(grads_unclipped,
                                      config.max_grad_norm)

    grad_hists = []
    for idx, grad in enumerate(grads_unclipped):
      if grad is None:
        pass
      else:
        grad_hists.append(tf.histogram_summary(tvar_names[idx], grad))

    # optimizer = tf.train.GradientDescentOptimizer(self.lr)
    #NB: for adam, need to set epsilon to other than the default 1e-8, otherwise get nans!
    optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, epsilon=1e-1)
    self._train_op = optimizer.apply_gradients(zip(grads, tvars))

    merged = tf.merge_all_summaries()
    self._merged = merged
コード例 #23
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        #cell = cell_fn(args.rnn_size)
        con_size = 50 #args.seq_length

        #self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
        self.network = Network(cell_fn, args.vocab_size, 20, args.vocab_size, args.rnn_size, con_size, args.num_layers)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.con_data = tf.placeholder(tf.int32, [args.batch_size, con_size])

        self.initial_state = self.network.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size * args.num_layers, args.vocab_size])
          
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            #embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
            embedding = tf.constant(np.identity(args.vocab_size, dtype=np.float32))

            inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
            inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

            # [(batch_size * seq_length) x vocab_size]
            con = tf.nn.embedding_lookup(embedding, self.con_data)


        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        #outputs, states = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        outputs, states = decoder(inputs, self.initial_state, self.network, con, loop_function=loop if infer else None, scope='rnnlm')

        # turn a list of output into row matrix where each row is output
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size * args.num_layers])

        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        print states
        self.final_state = states

        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #24
0
ファイル: char_rnn.py プロジェクト: mohakraaj/lyric_generator
    def create_model(self):

        self.input_data = tf.placeholder(tf.int32, [self.batch_size, self.seq_length], name="input_data")
        self.target_data = tf.placeholder(tf.int32, [self.batch_size, self.seq_length], name="target_data")

        # define hyper_parameters
        self.keep_prob = tf.Variable(0.3, trainable=False, name="keep_prob")
        self.lr = tf.Variable(0.0, trainable=False, name="lr")

        softmax_weights = tf.get_variable("softmax_weights", [self.rnn_size, self.vocab_size])
        softmax_biases = tf.get_variable("softmax_biases", [self.vocab_size])

        lstm_cell = rnn_cell.BasicLSTMCell(self.rnn_size)

        #        if self.is_training and self.keep_prob < 1:
        #              lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=self.keep_prob)

        multilayer_cell = rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers)
        self.initial_state = multilayer_cell.zero_state(self.batch_size, tf.float32)

        with tf.device("/cpu:0"):
            # define the embedding matrix for the whole vocabulary
            self.embedding = tf.get_variable("embeddings", [self.vocab_size, self.rnn_size])
            # take the vector representation for each word in the embeddings
            embeds = tf.nn.embedding_lookup(self.embedding, self.input_data)

        if self.is_training and self.keep_prob < 1:
            embeds = tf.nn.dropout(embeds, self.keep_prob)

        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_weights, softmax_biases)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(self.embedding, prev_symbol)

        # convert input to a list of seq_length
        inputs = tf.split(1, self.seq_length, embeds)

        # after splitting the shape becomes (batch_size,1,rnn_size). We need to modify it to [batch*rnn_size]
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        output, states = seq2seq.rnn_decoder(
            inputs, self.initial_state, multilayer_cell, loop_function=loop if self.infer else None, scope="rnnlm"
        )

        output = tf.reshape(tf.concat(1, output), [-1, self.rnn_size])

        self.logits = tf.nn.xw_plus_b(output, softmax_weights, softmax_biases)
        self.probs = tf.nn.softmax(self.logits, name="probability")

        loss = seq2seq.sequence_loss_by_example(
            [self.logits],
            [tf.reshape(self.target_data, [-1])],
            [tf.ones([self.batch_size * self.seq_length])],
            self.vocab_size,
        )
        self.cost = tf.reduce_sum(loss) / (self.batch_size * self.seq_length)

        self.final_state = states[-1]

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.grad_clip)

        optimizer = tf.train.AdamOptimizer(0.01)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #25
0
    def __init__(self, args):
        # define cell
        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            print "Invalid cell"
            sys.exit()

        cell = cell_fn(args.rnn_size)
        cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        # define inputs and targets, initialize state
        self.inputs = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        # prepare word embedding, reshape inputs
        with tf.name_scope("embedding"):
            with tf.device("/cpu:0"):
                if args.emb_vocab is None:
                    E = tf.get_variable("E", [args.vocab_size, args.rnn_size])
                else:
                    emb_dim = len(args.emb_vocab[args.emb_vocab.keys()[0]][1])
                    emb_mat = np.random.rand(args.vocab_size, emb_dim)
                    for word, (idx, emb_vec) in args.emb_vocab.iteritems():
                        emb_mat[idx] = emb_vec
                    E = tf.Variable(tf.convert_to_tensor(emb_mat, dtype=tf.float32), name="E")

                inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(E, self.inputs))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        # feed inputs into rnn
        with tf.name_scope("rnn"):
            outputs, states = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=None, scope='rnnlm')
            self.output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.output, self.dropout_keep_prob)

        # output layer
        with tf.name_scope("output"):
            W = tf.Variable(tf.truncated_normal([args.rnn_size, args.num_classes], stddev=0.1), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[args.num_classes]), name="b")
            self.logits = tf.nn.xw_plus_b(self.h_drop, W, b)
            self.probs = tf.nn.softmax(self.logits)
            self.predictions = tf.cast(tf.argmax(self.logits, 1), tf.int32)

        # accuracy
        with tf.name_scope("accuracy"):
            # calculate token-level accuracy
            self.reshaped_targets = tf.reshape(self.targets, [-1])
            correct_predictions = tf.equal(self.predictions, self.reshaped_targets)
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"))

            # calculate sentence-level accuracy
            self.predictions_sentence = tf.reshape(self.predictions, [-1, args.seq_length])  # batch_size * seq_length
            correct_predictions_sentence_tokens = tf.equal(self.predictions_sentence, self.targets)  # batch_size X seq_length
            multiply_mat = tf.constant(1, shape=[args.seq_length, 1])
            sentence_accuracy_mat = tf.matmul(tf.cast(correct_predictions_sentence_tokens, tf.int32), multiply_mat)  # batch_size X 1
            correct_predictions_sentence = \
                tf.equal(sentence_accuracy_mat, tf.constant(args.seq_length, shape=[args.batch_size, 1]))  # batch_size X 1
            self.accuracy_sentence = tf.reduce_mean(tf.cast(correct_predictions_sentence, "float"))

        # calculate loss
        with tf.name_scope("loss"):
            self.loss = seq2seq.sequence_loss_by_example(
                    [self.logits],  # TODO: should I use a list of 2D tensors ?
                    [self.reshaped_targets],  # TODO: correct ???
                    [tf.ones([args.batch_size * args.seq_length])],
                    args.num_classes)
            self.cost = tf.reduce_sum(self.loss) / args.batch_size / args.seq_length

        # train and update
        with tf.name_scope("update"):
            tvars = tf.trainable_variables()
            self.grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip)  # TODO: correct ???
            optimizer = tf.train.AdamOptimizer(args.learning_rate)
            self.global_step = tf.Variable(0, name="global_step", trainable=False)
            self.train_op = optimizer.apply_gradients(zip(self.grads, tvars), global_step=self.global_step)

            # l2 norm clipping
            self.weight_clipping_op = []
            trainable_vars = tf.trainable_variables()
            for var in trainable_vars:
                if var.name.startswith('output/W'):
                    updated_var = tf.clip_by_norm(var, args.l2_limit)
                    self.weight_clipping_op.append(tf.assign(var, updated_var))
コード例 #26
0
        inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(embedding, input_data))
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
# Loop function for seq2seq
def loop(prev, _):
    prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
    prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
    return tf.nn.embedding_lookup(embedding, prev_symbol)
# Output of RNN
outputs, last_state = seq2seq.rnn_decoder(inputs, initial_state, cell, loop_function=None, scope='rnnlm')
output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
# Next word probability
probs = tf.nn.softmax(logits)
# Define LOSS
loss = seq2seq.sequence_loss_by_example([logits], # Input
    [tf.reshape(targets, [-1])], # Target
    [tf.ones([batch_size * seq_length])], # Weight
    vocab_size)
# Define Optimizer
cost = tf.reduce_sum(loss) / batch_size / seq_length
final_state = last_state
lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
_optm = tf.train.AdamOptimizer(lr)
optm = _optm.apply_gradients(zip(grads, tvars))

print ("Network Ready")


# In[ ]:
コード例 #27
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        #cell = cell_fn(args.rnn_size)
        con_size = 50  #args.seq_length

        #self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
        self.network = Network(cell_fn, args.vocab_size, 20, args.vocab_size,
                               args.rnn_size, con_size, args.num_layers)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.con_data = tf.placeholder(tf.int32, [args.batch_size, con_size])

        self.initial_state = self.network.zero_state(args.batch_size,
                                                     tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable(
                "softmax_w",
                [args.rnn_size * args.num_layers, args.vocab_size])

            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            #embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
            embedding = tf.constant(
                np.identity(args.vocab_size, dtype=np.float32))

            inputs = tf.split(
                1, args.seq_length,
                tf.nn.embedding_lookup(embedding, self.input_data))
            inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

            # [(batch_size * seq_length) x vocab_size]
            con = tf.nn.embedding_lookup(embedding, self.con_data)

        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        #outputs, states = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        outputs, states = decoder(inputs,
                                  self.initial_state,
                                  self.network,
                                  con,
                                  loop_function=loop if infer else None,
                                  scope='rnnlm')

        # turn a list of output into row matrix where each row is output
        output = tf.reshape(tf.concat(1, outputs),
                            [-1, args.rnn_size * args.num_layers])

        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])], args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        print states
        self.final_state = states

        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #28
0
    def __init__(self, args, infer=False):
        self.args = args
        # if infer:
        #     args.batch_size = 1
        #     args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        #self.seq_length = tf.placeholder(tf.int32)
        #args.seq_length = self.seq_length
        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                # len(inputs)==args.seq_length, shape(inputs[0])==(args.batch_size, args.rnn_size)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            return None  # TODO
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        # len(outputs)==args.seq_length, shape(outputs[0])==(args.batch_size, args.rnn_size)
        outputs, states = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        # # shape(output) = (batch_size*seq_length, rnn_size)
        # output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        def handle_outputs(use_lastone=True):
            """ Shape of return is [batch_size, rnn_size].
            """
            if use_lastone:
                return outputs[-1]
            output = tf.add_n(outputs)
            output = tf.div(output, len(outputs))
            return output
        output = handle_outputs(use_lastone=False)
        # shape(logits) = (batch_size, vocab_size)
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size
        _ = tf.scalar_summary('cost', self.cost)

        # Evaluate accuracy
        correct_pred = tf.equal(tf.cast(tf.argmax(self.logits, 1), tf.int32), tf.reshape(self.targets, [-1]))
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        _ = tf.scalar_summary('accuracy', self.accuracy)

        self.final_state = states
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #29
0
ファイル: rnnlm.py プロジェクト: czhiming/tensorflow_code
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir',
                        type=str,
                        default='../data/xinhua',
                        help='data directory containing input.txt')
    parser.add_argument('--batch_size',
                        type=int,
                        default=120,
                        help='minibatch size')
    parser.add_argument('--seq_length',
                        type=int,
                        default=5,
                        help='RNN sequence length')
    parser.add_argument('--hidden_num',
                        type=int,
                        default=256,
                        help='number of hidden layers')
    parser.add_argument('--word_dim',
                        type=int,
                        default=256,
                        help='number of word embedding')
    parser.add_argument('--num_epochs',
                        type=int,
                        default=50,
                        help='number of epochs')
    parser.add_argument('--model',
                        type=str,
                        default='lstm',
                        help='rnn, gru, or lstm')
    parser.add_argument('--grad_clip',
                        type=float,
                        default=10.,
                        help='clip gradients at this value')

    args = parser.parse_args()  #参数集合

    #准备训练数据
    data_loader = TextLoader2(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    #模型定义
    graph = tf.Graph()
    with graph.as_default():

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.hidden_num)

        #输入变量
        input_data = tf.placeholder(tf.int32,
                                    [args.batch_size, args.seq_length])
        targets = tf.placeholder(tf.int64, [args.batch_size, args.seq_length])

        initial_state = cell.zero_state(args.batch_size, tf.float32)
        #模型参数
        with tf.variable_scope('rnnlm' + 'embedding'):
            embeddings = tf.Variable(
                tf.random_uniform([args.vocab_size, args.word_dim], -1.0, 1.0))
            embeddings = tf.nn.l2_normalize(embeddings, 1)

        with tf.variable_scope('rnnlm' + 'weight'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.hidden_num, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        # def loop(prev, _):
        #     prev = tf.matmul(prev, softmax_w) + softmax_b
        #     prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
        #     return tf.nn.embedding_lookup(embeddings, prev_symbol)

        inputs = tf.split(1, args.seq_length,
                          tf.nn.embedding_lookup(embeddings, input_data))
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        outputs, last_state = seq2seq.rnn_decoder(inputs, initial_state, cell)
        output = tf.reshape(tf.concat(1, outputs), [-1, args.hidden_num])
        logits = tf.matmul(output, softmax_w) + softmax_b
        probs = tf.nn.softmax(logits)
        loss_rnn = seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])], args.vocab_size)
        cost = tf.reduce_sum(loss_rnn) / args.batch_size / args.seq_length
        final_state = last_state
        lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdagradOptimizer(0.1)
        train_op = optimizer.apply_gradients(zip(grads, tvars))

        #输出词向量
        embeddings_norm = tf.sqrt(
            tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
        normalized_embeddings = embeddings / embeddings_norm

    #模型训练
    with tf.Session(graph=graph) as sess:
        tf.initialize_all_variables().run()
        for e in range(args.num_epochs):
            data_loader.reset_batch_pointer()
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {input_data: x, targets: y}
                train_loss, _ = sess.run([cost, train_op], feed)
                end = time.time()
                print(
                    "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                    .format(b, data_loader.num_batches, e, train_loss,
                            end - start))
            np.save('rnnlm_word_embeddings', normalized_embeddings.eval())
コード例 #30
0
ファイル: tagger.py プロジェクト: oplatek/rh_nntagging
    def __init__(
        self,
        vocab,
        tagset,
        alphabet,
        word_embedding_size,
        char_embedding_size,
        num_chars,
        num_steps,
        optimizer_desc,
        generate_lemmas,
        l2,
        dropout_prob_values,
        experiment_name,
        supply_form_characters_to_lemma,
        threads=0,
        seed=None,
        write_summaries=True,
        use_attention=True,
        scheduled_sampling=None,
    ):
        """
        Builds the tagger computation graph and initializes it in a TensorFlow
        session.

        Arguments:

            vocab: Vocabulary of word forms.

            tagset: Vocabulary of possible tags.

            alphabet: Vocabulary of possible characters.

            word_embedding_size (int): Size of the form-based word embedding.

            char_embedding_size (int): Size of character embeddings, i.e. a
                half of the size of the character-based words embeddings.

            num_chars: Maximum length of a word.

            num_steps: Maximum lenght of a sentence.

            optimizer_desc: Description of the optimizer.

            generate_lemmas: Generate lemmas during tagging.

            seed: TensorFlow seed

            write_summaries: Write summaries using TensorFlow interface.
        """

        self.num_steps = num_steps
        self.num_chars = num_chars

        self.word_embedding_size = word_embedding_size
        self.char_embedding_size = char_embedding_size
        self.lstm_size = word_embedding_size + 2 * char_embedding_size  ###

        self.vocab = vocab
        self.tagset = tagset
        self.alphabet = alphabet

        self.dropout_prob_values = dropout_prob_values

        self.forward_initial_state = tf.placeholder(
            tf.float32, [None, rnn_cell.BasicLSTMCell(self.lstm_size).state_size], name="forward_lstm_initial_state"
        )
        self.backward_initial_state = tf.placeholder(
            tf.float32, [None, rnn_cell.BasicLSTMCell(self.lstm_size).state_size], name="backward_lstm_initial_state"
        )
        self.sentence_lengths = tf.placeholder(tf.int64, [None], name="sentence_lengths")
        self.tags = tf.placeholder(tf.int32, [None, num_steps], name="ground_truth_tags")
        self.dropout_prob = tf.placeholder(tf.float32, [None], name="dropout_keep_p")
        self.generate_lemmas = generate_lemmas

        global_step = tf.Variable(0, trainable=False)

        input_list = []
        regularize = []

        # Word-level embeddings
        if word_embedding_size:
            self.words = tf.placeholder(tf.int32, [None, num_steps], name="words")
            word_embeddings = tf.Variable(tf.random_uniform([len(vocab), word_embedding_size], -1.0, 1.0))
            we_lookup = tf.nn.embedding_lookup(word_embeddings, self.words)

            input_list.append(we_lookup)

        # Character-level embeddings
        if char_embedding_size:
            self.chars = tf.placeholder(tf.int32, [None, num_steps, num_chars], name="chars")
            self.chars_lengths = tf.placeholder(tf.int64, [None, num_steps], name="chars_lengths")

            char_embeddings = tf.Variable(tf.random_uniform([len(alphabet), char_embedding_size], -1.0, 1.0))
            ce_lookup = tf.nn.embedding_lookup(char_embeddings, self.chars)

            reshaped_ce_lookup = tf.reshape(ce_lookup, [-1, num_chars, char_embedding_size], name="reshape-char_inputs")
            char_inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_chars, reshaped_ce_lookup)]

            char_inputs_lengths = tf.reshape(self.chars_lengths, [-1])

            with tf.variable_scope("char_forward"):
                char_lstm = rnn_cell.BasicLSTMCell(char_embedding_size)
                _, char_last_state = rnn.rnn(
                    cell=char_lstm, inputs=char_inputs, sequence_length=char_inputs_lengths, dtype=tf.float32
                )
                tf.get_variable_scope().reuse_variables()
                regularize.append(tf.get_variable("RNN/BasicLSTMCell/Linear/Matrix"))

            with tf.variable_scope("char_backward"):
                char_lstm_rev = rnn_cell.BasicLSTMCell(char_embedding_size)
                _, char_last_state_rev = rnn.rnn(
                    cell=char_lstm_rev,
                    inputs=self._reverse_seq(char_inputs, char_inputs_lengths),
                    sequence_length=char_inputs_lengths,
                    dtype=tf.float32,
                )
                tf.get_variable_scope().reuse_variables()
                regularize.append(tf.get_variable("RNN/BasicLSTMCell/Linear/Matrix"))

            last_char_lstm_state = tf.split(1, 2, char_last_state)[1]
            last_char_lstm_state_rev = tf.split(1, 2, char_last_state_rev)[1]

            last_char_states = tf.reshape(
                last_char_lstm_state, [-1, num_steps, char_embedding_size], name="reshape-charstates"
            )
            last_char_states_rev = tf.reshape(
                last_char_lstm_state_rev, [-1, num_steps, char_embedding_size], name="reshape-charstates_rev"
            )

            char_output = tf.concat(2, [last_char_states, last_char_states_rev])

            input_list.append(char_output)

        # All inputs correctly sliced
        input_list_dropped = [tf.nn.dropout(x, self.dropout_prob[0]) for x in input_list]
        inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, tf.concat(2, input_list_dropped))]

        with tf.variable_scope("forward"):
            lstm = rnn_cell.BasicLSTMCell(self.lstm_size)
            outputs, last_state = rnn.rnn(
                cell=lstm,
                inputs=inputs,
                dtype=tf.float32,
                initial_state=self.forward_initial_state,
                sequence_length=self.sentence_lengths,
            )

            tf.get_variable_scope().reuse_variables()
            regularize.append(tf.get_variable("RNN/BasicLSTMCell/Linear/Matrix"))

        with tf.variable_scope("backward"):
            lstm_rev = rnn_cell.BasicLSTMCell(self.lstm_size)
            outputs_rev_rev, last_state_rev = rnn.rnn(
                cell=lstm_rev,
                inputs=self._reverse_seq(inputs, self.sentence_lengths),
                dtype=tf.float32,
                initial_state=self.backward_initial_state,
                sequence_length=self.sentence_lengths,
            )

            outputs_rev = self._reverse_seq(outputs_rev_rev, self.sentence_lengths)

            tf.get_variable_scope().reuse_variables()
            regularize.append(tf.get_variable("RNN/BasicLSTMCell/Linear/Matrix"))

        # outputs_forward = tf.reshape(tf.concat(1, outputs), [-1, self.lstm_size],
        #                    name="reshape-outputs_forward")

        # outputs_backward = tf.reshape(tf.concat(1, outputs_rev), [-1, self.lstm_size],
        #                    name="reshape-outputs_backward")

        # forward_w = tf.get_variable("forward_w", [self.lstm_size, self.lstm_size])
        # backward_w = tf.get_variable("backward_w", [self.lstm_size, self.lstm_size])
        # non_linearity_bias = tf.get_variable("non_linearity_b", [self.lstm_size])

        outputs_bidi = [tf.concat(1, [o1, o2]) for o1, o2 in zip(outputs, reversed(outputs_rev))]

        # output = tf.tanh(tf.matmul(outputs_forward, forward_w) + tf.matmul(outputs_backward, backward_w) + non_linearity_bias)
        output = tf.reshape(tf.concat(1, outputs_bidi), [-1, 2 * self.lstm_size], name="reshape-outputs_bidi")
        output_dropped = tf.nn.dropout(output, self.dropout_prob[1])

        # We are computing only the logits, not the actual softmax -- while
        # computing the loss, it is done by the sequence_loss_by_example and
        # during the runtime classification, the argmax over logits is enough.

        softmax_w = tf.get_variable("softmax_w", [2 * self.lstm_size, len(tagset)])
        logits_flatten = tf.nn.xw_plus_b(output_dropped, softmax_w, tf.get_variable("softmax_b", [len(tagset)]))
        # tf.get_variable_scope().reuse_variables()
        regularize.append(softmax_w)

        self.logits = tf.reshape(logits_flatten, [-1, num_steps, len(tagset)], name="reshape-logits")
        estimated_tags_flat = tf.to_int32(tf.argmax(logits_flatten, dimension=1))
        self.last_state = last_state

        # output maks: compute loss only if it insn't a padded word (i.e. zero index)
        output_mask = tf.reshape(tf.to_float(tf.not_equal(self.tags, 0)), [-1])

        gt_tags_flat = tf.reshape(self.tags, [-1])
        tagging_loss = seq2seq.sequence_loss_by_example(
            logits=[logits_flatten], targets=[gt_tags_flat], weights=[output_mask]
        )

        tagging_accuracy = tf.reduce_sum(
            tf.to_float(tf.equal(estimated_tags_flat, gt_tags_flat)) * output_mask
        ) / tf.reduce_sum(output_mask)
        tf.scalar_summary("train_accuracy", tagging_accuracy, collections=["train"])
        tf.scalar_summary("dev_accuracy", tagging_accuracy, collections=["dev"])

        self.cost = tf.reduce_mean(tagging_loss)

        tf.scalar_summary("train_tagging_loss", tf.reduce_mean(tagging_loss), collections=["train"])
        tf.scalar_summary("dev_tagging_loss", tf.reduce_mean(tagging_loss), collections=["dev"])

        if generate_lemmas:
            with tf.variable_scope("decoder"):
                self.lemma_chars = tf.placeholder(tf.int32, [None, num_steps, num_chars + 2], name="lemma_chars")

                lemma_state_size = self.lstm_size

                lemma_w = tf.Variable(tf.random_uniform([lemma_state_size, len(alphabet)], 0.5), name="state_to_char_w")
                lemma_b = tf.Variable(tf.fill([len(alphabet)], -math.log(len(alphabet))), name="state_to_char_b")
                lemma_char_embeddings = tf.Variable(
                    tf.random_uniform(
                        [len(alphabet), lemma_state_size / (2 if supply_form_characters_to_lemma else 1)], -0.5, 0.5
                    ),
                    name="char_embeddings",
                )

                lemma_char_inputs = [
                    tf.squeeze(input_, [1])
                    for input_ in tf.split(
                        1,
                        num_chars + 2,
                        tf.reshape(self.lemma_chars, [-1, num_chars + 2], name="reshape-lemma_char_inputs"),
                    )
                ]

                if supply_form_characters_to_lemma:
                    char_inputs_zeros = [
                        tf.squeeze(chars, [1])
                        for chars in tf.split(
                            1, num_chars, tf.reshape(self.chars, [-1, num_chars], name="reshape-char_inputs_zeros")
                        )
                    ]
                    char_inputs_zeros.append(char_inputs_zeros[0] * 0)

                    def loop(prev_state, i):
                        # it takes the previous hidden state, finds the character and formats it
                        # as input for the next time step ... used in the decoder in the "real decoding scenario"
                        out_activation = tf.matmul(prev_state, lemma_w) + lemma_b
                        prev_char_index = tf.argmax(out_activation, 1)
                        return tf.concat(
                            1,
                            [
                                tf.nn.embedding_lookup(lemma_char_embeddings, prev_char_index),
                                tf.nn.embedding_lookup(lemma_char_embeddings, char_inputs_zeros[i]),
                            ],
                        )

                    embedded_lemma_characters = []
                    for lemma_chars, form_chars in zip(lemma_char_inputs[:-1], char_inputs_zeros):
                        embedded_lemma_characters.append(
                            tf.concat(
                                1,
                                [
                                    tf.nn.embedding_lookup(lemma_char_embeddings, lemma_chars),
                                    tf.nn.embedding_lookup(lemma_char_embeddings, form_chars),
                                ],
                            )
                        )
                else:

                    def loop(prev_state, _):
                        # it takes the previous hidden state, finds the character and formats it
                        # as input for the next time step ... used in the decoder in the "real decoding scenario"
                        out_activation = tf.matmul(prev_state, lemma_w) + lemma_b
                        prev_char_index = tf.argmax(out_activation, 1)
                        return tf.nn.embedding_lookup(lemma_char_embeddings, prev_char_index)

                    embedded_lemma_characters = []
                    for lemma_chars in lemma_char_inputs[:-1]:
                        embedded_lemma_characters.append(tf.nn.embedding_lookup(lemma_char_embeddings, lemma_chars))

                def sampling_loop(prev_state, i):
                    threshold = scheduled_sampling / (scheduled_sampling + tf.exp(tf.to_float(global_step)))
                    condition = tf.less_equal(tf.random_uniform(tf.shape(embedded_lemma_characters[0])), threshold)
                    return tf.select(condition, embedded_lemma_characters[i], loop(prev_state, i))

                decoder_cell = rnn_cell.BasicLSTMCell(lemma_state_size)

                if scheduled_sampling:
                    lf = sampling_loop
                else:
                    lf = None

                if use_attention:
                    lemma_outputs_train, _ = seq2seq.attention_decoder(
                        embedded_lemma_characters, output_dropped, reshaped_ce_lookup, decoder_cell, loop_function=lf
                    )
                else:
                    lemma_outputs_train, _ = seq2seq.rnn_decoder(
                        embedded_lemma_characters, output_dropped, decoder_cell, loop_function=lf
                    )

                tf.get_variable_scope().reuse_variables()
                # regularize.append(tf.get_variable('attention_decoder/BasicLSTMCell/Linear/Matrix'))

                tf.get_variable_scope().reuse_variables()

                if use_attention:
                    lemma_outputs_runtime, _ = seq2seq.attention_decoder(
                        embedded_lemma_characters, output_dropped, reshaped_ce_lookup, decoder_cell, loop_function=loop
                    )
                else:
                    lemma_outputs_runtime, _ = seq2seq.rnn_decoder(
                        embedded_lemma_characters, output_dropped, decoder_cell, loop_function=loop
                    )

                lemma_char_logits_train = [tf.matmul(o, lemma_w) + lemma_b for o in lemma_outputs_train]

                lemma_char_logits_runtime = [tf.matmul(o, lemma_w) + lemma_b for o in lemma_outputs_runtime]

                self.lemmas_decoded = tf.reshape(
                    tf.transpose(tf.argmax(tf.pack(lemma_char_logits_runtime), 2)), [-1, num_steps, num_chars + 1]
                )

                lemma_char_weights = []
                for lemma_chars in lemma_char_inputs[1:]:
                    lemma_char_weights.append(tf.to_float(tf.not_equal(lemma_chars, 0)))

                lemmatizer_loss = seq2seq.sequence_loss(
                    lemma_char_logits_train, lemma_char_inputs[1:], lemma_char_weights
                )

                lemmatizer_loss_runtime = seq2seq.sequence_loss(
                    lemma_char_logits_runtime, lemma_char_inputs[1:], lemma_char_weights
                )

                tf.scalar_summary(
                    "train_lemma_loss_with_gt_inputs", tf.reduce_mean(lemmatizer_loss), collections=["train"]
                )
                tf.scalar_summary("dev_lemma_loss_with_gt_inputs", tf.reduce_mean(lemmatizer_loss), collections=["dev"])

                tf.scalar_summary(
                    "train_lemma_loss_with_decoded_inputs",
                    tf.reduce_mean(lemmatizer_loss_runtime),
                    collections=["train"],
                )
                tf.scalar_summary(
                    "dev_lemma_loss_with_decoded_inputs", tf.reduce_mean(lemmatizer_loss_runtime), collections=["dev"]
                )

                self.cost += tf.reduce_mean(lemmatizer_loss) + tf.reduce_mean(lemmatizer_loss_runtime)

        self.cost += l2 * sum([tf.nn.l2_loss(variable) for variable in regularize])

        tf.scalar_summary("train_optimization_cost", self.cost, collections=["train"])
        tf.scalar_summary("dev_optimization_cost", self.cost, collections=["dev"])

        def decay(learning_rate, exponent, iteration_steps):
            return tf.train.exponential_decay(learning_rate, global_step, iteration_steps, exponent, staircase=True)

        optimizer = eval("tf.train." + optimizer_desc)
        self.train = optimizer.minimize(self.cost, global_step=global_step)

        if threads > 0:
            self.session = tf.Session(
                config=tf.ConfigProto(inter_op_parallelism_threads=threads, intra_op_parallelism_threads=threads)
            )
        else:
            self.session = tf.Session()
        self.session.run(tf.initialize_all_variables())

        if write_summaries:
            self.summary_train = tf.merge_summary(tf.get_collection("train"))
            self.summary_dev = tf.merge_summary(tf.get_collection("dev"))
            timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
            self.summary_writer = tf.train.SummaryWriter("logs/" + timestamp + "_" + experiment_name)

        self.steps = 0
コード例 #31
0
    def __init__(self, CellType, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps],
                                         name="input_data")
        self.targets = tf.placeholder(tf.int32, [batch_size, num_steps],
                                      name="targets")

        lstm_cell = CellType(size)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
        self.initial_state = cell.zero_state(batch_size, tf.float32)

        # initializer used for reusable variable initializer (see `get_variable`)
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size],
                                        initializer=initializer)
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        outputs = []
        states = []
        state = self.initial_state

        with tf.variable_scope("RNN", initializer=initializer):
            for time_step in range(num_steps):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()

                inputs_slice = inputs[:, time_step, :]
                (cell_output, state) = cell(inputs_slice, state)

                outputs.append(cell_output)
                states.append(state)

        self.final_state = states[-1]

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        w = tf.get_variable("softmax_w", [size, vocab_size],
                            initializer=initializer)
        b = tf.get_variable("softmax_b", [vocab_size], initializer=initializer)

        logits = tf.nn.xw_plus_b(output, w, b)  # compute logits for loss
        targets = tf.reshape(self.targets, [-1])  # reshape our target outputs
        weights = tf.ones([batch_size * num_steps
                           ])  # used to scale the loss average

        # computes loss and performs softmax on our fully-connected output layer
        loss = sequence_loss_by_example([logits], [targets], [weights],
                                        vocab_size)
        self.cost = cost = tf.div(tf.reduce_sum(loss), batch_size, name="cost")

        if is_training:
            # setup learning rate variable to decay
            self.lr = tf.Variable(1.0, trainable=False)

            # define training operation and clip the gradients
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                              config.max_grad_norm)
            optimizer = tf.train.GradientDescentOptimizer(self.lr)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                      name="train")
        else:
            # if this model isn't for training (i.e. testing/validation) then we don't do anything here
            self.train_op = tf.no_op()
コード例 #32
0
ファイル: model.py プロジェクト: PFCM/tml
    def __init__(self, is_training, config):
        """constructs a graph"""
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size
        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps],
                                          name="input_data")
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps],
                                       name="targets")

        # here it is
        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=1.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        # do an embedding (always on cpu)
        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size])
            inputs = tf.split(
                1, num_steps, tf.nn.embedding_lookup(embedding, self._input_data))
            inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        if is_training and config.keep_prob < 1:
            inputs = [tf.nn.dropout(input_, config.keep_prob) for input_ in inputs]

        from tensorflow.models.rnn import rnn
        outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)

        # reshape
        outputs = tf.reshape(tf.concat(1, outputs), [-1, size])

        logits = tf.nn.xw_plus_b(outputs,
                                 tf.get_variable("softmax_W", [size,vocab_size]),
                                 tf.get_variable("softmax_b", [vocab_size]))
        self._softmax_out = tf.nn.softmax(logits) # this is just used for sampling
        loss = seq2seq.sequence_loss_by_example([logits],
                                                [tf.reshape(self._targets,[-1])],
                                                [tf.ones([batch_size * num_steps])],
                                                vocab_size)
        self._cost = cost = tf.div(tf.reduce_sum(loss),
                                   tf.constant(batch_size, dtype=tf.float32))
        self._final_state = states[-1]

        if not is_training:
            return # don't need to optimisation ops

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        # actually the simple guy does good
        # with the grad clipping and the lr schedule and whatnot
#ftrl?
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        optimizer = tf.train.FtrlOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #33
0
    tf.placeholder(tf.int32, shape=[_seq_length]) for _ in xrange(_batch_size)
]
targets = [
    tf.placeholder(tf.int32, shape=[_seq_length]) for _ in xrange(_batch_size)
]
target_weights = [
    tf.ones(dtype=tf.float32, shape=[_seq_length]) for _ in xrange(_batch_size)
]

# set up the tied seq-to-seq LSTM with given parameters
single_cell = rnn_cell.BasicLSTMCell(_lstm_cell_dimension)
cell = rnn_cell.MultiRNNCell([single_cell] * _lstm_num_layers)
outputs, _ = seq2seq.embedding_tied_rnn_seq2seq(encoder_inputs, decoder_inputs,
                                                cell, _vocab_size_including_GO)
seqloss = seq2seq.sequence_loss_by_example(outputs, encoder_inputs,
                                           target_weights,
                                           _vocab_size_including_GO)

tf.train.SummaryWriter(_train_log_dir, sess.graph_def)
global_step = tf.Variable(0, name='global_step', trainable=False)
sess.run(tf.initialize_all_variables())

# Set up the optimizer with gradient clipping
params = tf.trainable_variables()
gradients = tf.gradients(seqloss, params)
optimizer = tf.train.GradientDescentOptimizer(_lstm_learn_rate)
clipped_gradients, norm = tf.clip_by_global_norm(gradients,
                                                 _lstm_max_grad_norm)
train_op = optimizer.apply_gradients(zip(clipped_gradients, params),
                                     global_step=global_step)
コード例 #34
0
ファイル: gan_new.py プロジェクト: liamb315/TensorFlow

if __name__ == '__main__':
	ops.reset_default_graph()       
	if 'session' in globals():         
		session.close()                
	session = tf.Session()             
	args = parse_args()

	# Generator Training
	input_data    = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
	targts        = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) # Should be 1 for real
	gen_seq       = generator(input_data, args)
	gen_loss      = seq2seq.sequence_loss_by_example(
					[discriminator(gen_seq, args)[1]], # Input wants logits, not probs
					[tf.reshape(targets, [-1])], 
					[tf.ones([args.batch_size * args.seq_length])],
					2)
	gen_cost      = tf.reduce_sum(gen_loss) / args.batch_size / args.seq_length
	gen_vars      = [v for v in tf.all_variables() if v.name.startswith("generator/")]
	gen_optimizer = tf.train.AdamOptimizer(args.learning_rate_gen)
	gen_train_op  = minimize_and_clip(gen_optimizer, objective = gen_cost, var_list = gen_vars)


	# Discriminator Training
	# TODO:  Should this be tf.int32?
	input_real_seq  = tf.placholder(tf.float32, [args.batch_size, args.seq_length, args.vocab_size]) 
	input_gen_seq   = tf.placholder(tf.float32, [args.batch_size, args.seq_length, args.vocab_size]) 

	dis_real_prob   = discriminator(input_real_seq, args)
	dis_fake_prob   = discriminator(input_gen_seq, args)
コード例 #35
0
    def __init__(self, args, predict=False):

        self.args = args
        if predict:
            batchSize = 1
            numSteps = 1

        # Various parameters for the LSTM.
        # Hardcoded here for now.
        numSteps = 50  # Steps to unroll for
        batchSize = 50
        rnnSize = 128
        numLayers = 2
        gradClip = 5
        learningRate = 0.002
        decayRate = 0.97

        #Create LSTM layer and stack multiple layers.
        lstmCell = rnn_cell.BasicLSTMCell(rnnSize)
        lstmNet = rnn_cell.MultiRNNCell([lstmCell] * numLayers)

        #Define placeholders.
        self.inputData = tf.placeholder(tf.int32, [batchSize, numSteps])
        self.targetOutput = tf.placeholder(tf.int32, [batchSize, numSteps])
        self.initialState = lstmNet.zero_state(batchSize, tf.float32)

        # If rnn_decoder is told to loop, this function will return to it the output at time
        # 't' for feeding as the input at time 't+1'. During training, this is generally
        # not done because we want to feed the *correct* input at all times and not what
        # is output. During prediction/testing, we loop the output back to the input to
        # generate our sequence of notes.
        def feedBack(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        with tf.variable_scope('nn_lstm'):
            softmax_w = tf.get_variable("softmax_w", [rnnSize, args.vocabSize])
            softmax_b = tf.get_variable("softmax_b", [args.vocabSize])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding",
                                            [args.vocabSize, rnnSize])
                inputs = tf.split(
                    1, numSteps,
                    tf.nn.embedding_lookup(embedding, self.inputData))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        #Call seq2seq rnn decoder.
        outputs, states = seq2seq.rnn_decoder(
            inputs,
            self.initialState,
            lstmNet,
            loop_function=feedBack if predict else None,
            scope='nn_lstm')
        output = tf.reshape(tf.concat(1, outputs), [-1, rnnSize])

        #Logit and probability
        #softmax_w = tf.get_variable("softmax_w", rnnSize, [args.vocabSize])
        #softmax_b = tf.get_variable("softmax_b", [args.vocabSize])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)

        # Calculate loss compared to targetOutput
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targetOutput, [-1])],
            [tf.ones([batchSize * numSteps])], args.vocabSize)

        # Set the cost to minimize total loss.
        self.cost = tf.reduce_sum(loss)

        # Learning rate remains constant (not trainable)
        self.finalState = states[-1]
        self.learningRate = tf.Variable(0.0, trainable=False)

        # Define gradient and trainable variables for adjusting
        # during training/optimization.
        trainableVars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(self.cost, trainableVars), gradClip)

        # We use the Adam optimizer.
        #optimizer = tf.train.GradientDescentOptimizer(self.learningRate).minimize(loss)
        #optimizer = tf.train.AdagradOptimizer(self.learningRate, initial_accumulator_value=0.1)
        #self.trainStep = optimizer.apply_gradients(zip(grads, trainableVars))
        optimizer = tf.train.AdamOptimizer(self.learningRate)
        self.trainStep = optimizer.apply_gradients(zip(grads, trainableVars))
コード例 #36
0
ファイル: modifier.py プロジェクト: manjarisinha248/robo-chef
    def model_with_buckets(self):
        #build an rnn model for each bucket, since tensor flow can't deal with variable length sequences
        #variables are shared across the different buckets, and if you only ask for the outputs
        #up to a certain bucket, then additional computation won't be done past teh steps in that bucket

        #UNCOMMENT FOR ATTENTION
        all_inputs = self._input_refinement + [
            seg for seg in self._input_recipe_segments
        ] + self._target
        costs = []
        losses = []
        outputs = []
        with tf.op_scope(all_inputs, None, "model_with_buckets"):
            for j in xrange(len(self.buckets)):
                if j > 0:
                    outside_reuse = True
                else:
                    outside_reuse = None
                with tf.variable_scope("bucket_model_outside",
                                       reuse=outside_reuse):
                    phrase_num = self.buckets[j][0]
                    phrase_len = self.buckets[j][1]
                    bucket_refinement_inputs = [
                        self._input_refinement[i] for i in xrange(phrase_len)
                    ]
                    bucket_recipe_segments_inputs = []
                    bucket_target = []
                    bucket_weights = []
                forward_attention_weights = []
                #backward_attention_weights = []
                for i in xrange(phrase_num):
                    with tf.variable_scope("bucket_model_outside",
                                           reuse=outside_reuse):
                        bucket_target.append(self._target[i])
                        bucket_weights.append(
                            tf.constant(1,
                                        dtype=np.float32,
                                        shape=[self._batch_size]))
                    # if j > 0 and i < self.buckets[j-1][0]:
                    # with tf.variable_scope("attention", reuse=True):
                    # forward_weight = tf.get_variable("forward_attention_weight%d"%(i))
                    #backward_weight = tf.get_variable("backward_attention_weight%d"%(i))
                    # else:
                    # with tf.variable_scope("attention", reuse=None):
                    # forward_weight = tf.get_variable("forward_attention_weight%d"%(i), [self._batch_size], dtype=tf.float32)
                    #backward_weight = tf.get_variable("backward_attention_weight%d"%(i), [self._batch_size], dtype=tf.float32)
                    # forward_attention_weights.append(forward_weight)
                    #backward_attention_weights.append(backward_weight)
                    with tf.variable_scope("bucket_model_outside",
                                           reuse=outside_reuse):
                        bucket_recipe_segments_inputs.append([])
                        for k in xrange(phrase_len):
                            bucket_recipe_segments_inputs[-1].append(
                                self._input_recipe_segments[i][k])

                with tf.variable_scope("bucket_model_outside",
                                       reuse=outside_reuse):
                    bucket_logits = self.build_rnn_model(
                        bucket_refinement_inputs,
                        bucket_recipe_segments_inputs)
                    #forward_attention_weights)#, backward_attention_weights)
                    outputs.append([
                        tf.nn.softmax(bucket_logit)
                        for bucket_logit in bucket_logits
                    ])
                    loss = seq2seq.sequence_loss_by_example(
                        bucket_logits, bucket_target, bucket_weights, 2)
                    losses.append(loss)
                    costs.append(tf.reduce_sum(loss))
                    tf.histogram_summary("cost_bucket_%d" % j, costs[-1])
        # for i,f in enumerate(forward_attention_weights):
        # tf.histogram_summary("forward_attention_weight%d"%i, f)
        #tf.histogram_summary("backward_attention_weight%d"%i, backward_attention_weights[i])
        return outputs, losses, costs
コード例 #37
0
ファイル: model.py プロジェクト: tmusy/char-rnn-tensorflow
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        # create tensorflow placeholder
        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        # Initial state of the cell memory.
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        # create namespace for shareable variables (variable name = "rnnlm/softmax_w")
        with tf.variable_scope('rnnlm'):
            # create (or get) a variable with shape [rnn_size, vocab_size]
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                # preparing dense representation of the data in a embedding matrix
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        # rnn network
        outputs, last_state = seq2seq.rnn_decoder(inputs,
                                                  self.initial_state,
                                                  cell,
                                                  loop_function=loop if infer else None,
                                                  scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        # last layer (like fully connected nn)
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        # activation function of the last layer
        self.probs = tf.nn.softmax(self.logits)

        # loss function
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)

        # training function
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length

        self.final_state = last_state

        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #38
0
    # add dropout here if needed

    # create outputs and states
    outputs, states = rnn.rnn(cell, inputs, initial_state=initial_state)

    # reshape output
    output = tf.reshape(tf.concat(1, outputs), [-1, hidden_size])

    # specify XW + b
    logits = tf.nn.xw_plus_b(output,
                             tf.get_variable('softmax_w', [hidden_size, vocab_size]),
                             tf.get_variable('softmax_b', [vocab_size]))

    # define loss
    loss = seq2seq.sequence_loss_by_example([logits],
                                            [tf.reshape(targets, [-1])],
                                            [tf.ones([batch_size * num_steps])],
                                            vocab_size)

    # define individual cost
    _cost = tf.reduce_sum(loss) / batch_size

    # get final state
    final_state = states[-1]

    # create learning rate variable
    _lr = tf.Variable(0.0, trainable=False)

    # define gradient clipping
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(_cost, tvars), max_grad_norm)
コード例 #39
0
ファイル: BiRNN_Embed.py プロジェクト: zhuchenxi/LanideNN
    def __init__(self, sess, params, vocabs_size):
        NNModel.Model.__init__(self, vocabs_size)

        self.params = params

        self.batch_size = self.params.get("batch_size")
        self.max_length = self.params.get("max_length")
        self.size = self.params.get("size")
        self.num_layers = self.params.get("num_layers")
        # the learning rate could be a float, but this way we can adjust it during training
        # self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate = self.params.get("learning_rate")
        self.embedding_size = self.params.get("embedding_size")
        # self.global_step = tf.Variable(0, trainable=False)
        self.incorrect = [0] * self.max_length
        self.global_step = 0
        self.corpus_name = self.params.get("corpus_name")

        logging.info(
            "BiRNN model created with {0} layers of {1} cells. Embedding = {2}. Vocabulary sizes = {3}, length = {4}, batch = {5}."
            .format(self.num_layers, self.size, self.embedding_size,
                    vocabs_size, self.max_length, self.batch_size))

        # forward RNN
        with tf.variable_scope('forward'):
            fcell = rnn_cell.GRUCell(self.size, input_size=self.embedding_size)
            forward_cell = fcell
            if self.num_layers > 1:
                fcell2 = rnn_cell.GRUCell(self.size)
                forward_cell = rnn_cell.MultiRNNCell([fcell] +
                                                     ([fcell2] *
                                                      self.num_layers))

        # backward RNN
        with tf.variable_scope('backward'):
            bcell = rnn_cell.GRUCell(self.size, input_size=self.embedding_size)
            backward_cell = bcell
            if self.num_layers > 1:
                bcell2 = rnn_cell.GRUCell(self.size)
                backward_cell = rnn_cell.MultiRNNCell([bcell] +
                                                      ([bcell2] *
                                                       self.num_layers))

        #seq_len = tf.fill([self.batch_size], constant(self.max_length, dtype=tf.int64))

        # self.inputs = tf.placeholder(tf.float32, shape=[self.max_length, self.batch_size, self.vocab_sizes[0]], name="inputs")
        self.inputs = [
            tf.placeholder(tf.int32, shape=[None], name="inputs{0}".format(i))
            for i in range(self.max_length)
        ]
        self.targets = [
            tf.placeholder(tf.int32, shape=[None], name="targets{0}".format(i))
            for i in range(self.max_length)
        ]

        self.sentence_lengths = tf.placeholder(tf.int64,
                                               shape=[None],
                                               name="sequence_lengths")
        self.dropout_placeholder = tf.placeholder(tf.float32,
                                                  shape=[],
                                                  name="dropout")

        self.word_embeddings = tf.Variable(
            tf.random_uniform([self.vocab_sizes[0], self.embedding_size], -1.0,
                              1.0))
        embedded_inputs = [
            tf.nn.embedding_lookup(self.word_embeddings, input_)
            for input_ in self.inputs
        ]
        dropped_embedded_inputs = [
            tf.nn.dropout(i, self.dropout_placeholder) for i in embedded_inputs
        ]  # dropout je realny cislo

        weights = {
            # Hidden layer weights => 2*n_hidden because of foward + backward cells
            # 'hidden': tf.Variable(tf.random_uniform([self.vocab_sizes[0], 2 * size]), name="hidden-weight"),
            'out':
            tf.Variable(tf.random_uniform([2 * self.size,
                                           self.vocab_sizes[1]]),
                        name="out-weight")
        }
        biases = {
            # 'hidden': tf.Variable(tf.random_uniform([2 * size]), name="hidden-bias"),
            'out':
            tf.Variable(tf.random_uniform([self.vocab_sizes[1]]),
                        name="out-bias")
        }

        # hack to omit information from RNN creation
        logging.getLogger().setLevel(logging.CRITICAL)
        with tf.variable_scope('BiRNN-net'):
            # bidi_layer = BidirectionalRNNLayer(forward_cell, backward_cell, dropped_embedded_inputs, self.sentence_lengths)
            # with tf.variable_scope('forward'):
            #     output_fw, last_state = rnn.rnn(cell=forward_cell, inputs=dropped_embedded_inputs, dtype=tf.float32, sequence_length=self.sentence_lengths)
            #
            # with tf.variable_scope('backward'):
            #     outputs_rev_rev, last_state_rev = rnn.rnn(cell=backward_cell, inputs=rnn._reverse_seq(dropped_embedded_inputs, self.sentence_lengths), dtype=tf.float32,
            #                                               sequence_length=self.sentence_lengths)
            #     output_bw = self.rnn._reverse_seq(outputs_rev_rev, self.sentence_lengths)
            #
            # outputs = [array_ops.concat(1, [fw, bw]) for fw, bw in zip(output_fw, output_bw)]
            outputs = rnn.bidirectional_rnn(
                forward_cell,
                backward_cell,
                dropped_embedded_inputs,
                sequence_length=self.sentence_lengths,
                dtype=tf.float32)

        logging.getLogger().setLevel(logging.INFO)

        self.out = []
        self.probs = []
        # after switch to TF 0.8 it started outputing some merges for FC a BC
        for o in outputs[0]:
            # TODO ############# pridat tf.nn.relu(MATMUL+BIAs) ???
            intermediate_out = tf.matmul(o, weights['out']) + biases['out']
            self.out.append(intermediate_out)
            self.probs.append(tf.nn.softmax(intermediate_out))

        loss = seq2seq.sequence_loss_by_example(self.out, self.targets,
                                                [tf.ones([self.batch_size])] *
                                                self.max_length,
                                                self.vocab_sizes[1])

        self.cost = tf.reduce_sum(loss) / self.batch_size

        tf.scalar_summary("Cost", self.cost)

        self.updates = tf.train.AdamOptimizer(
            self.learning_rate).minimize(loss)

        self.saver = tf.train.Saver(max_to_keep=0)  # don't remove old models

        self.summaries = tf.merge_all_summaries()
        self.sum_writer = tf.python.training.summary_io.SummaryWriter(
            "tmp", sess.graph)

        # Initializing the variables & Launch the graph

        sess.run(tf.initialize_all_variables())
        logging.info("BiRNN model initialized.")
コード例 #40
0
    def __init__(self, is_training, config):
        """constructs a graph"""
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size
        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps],
                                          name="input_data")
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps],
                                       name="targets")

        # here it is
        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=1.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        # do an embedding (always on cpu)
        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size])
            inputs = tf.split(
                1, num_steps,
                tf.nn.embedding_lookup(embedding, self._input_data))
            inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        if is_training and config.keep_prob < 1:
            inputs = [
                tf.nn.dropout(input_, config.keep_prob) for input_ in inputs
            ]

        from tensorflow.models.rnn import rnn
        outputs, states = rnn.rnn(cell,
                                  inputs,
                                  initial_state=self._initial_state)

        # reshape
        outputs = tf.reshape(tf.concat(1, outputs), [-1, size])

        logits = tf.nn.xw_plus_b(
            outputs, tf.get_variable("softmax_W", [size, vocab_size]),
            tf.get_variable("softmax_b", [vocab_size]))
        self._softmax_out = tf.nn.softmax(
            logits)  # this is just used for sampling
        loss = seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])], vocab_size)
        self._cost = cost = tf.div(tf.reduce_sum(loss),
                                   tf.constant(batch_size, dtype=tf.float32))
        self._final_state = states[-1]

        if not is_training:
            return  # don't need to optimisation ops

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        # actually the simple guy does good
        # with the grad clipping and the lr schedule and whatnot
        #ftrl?
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        optimizer = tf.train.FtrlOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #41
0
    def __init__(self, is_training, image_tensor, config, global_step_tensor):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        image_tensor = tf.nn.local_response_normalization(image_tensor)
        self.alexnet = alexnet.AlexNet({'data': image_tensor}, trainable=False)
        self.image_input = image_tensor

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        lstm_cell = rnn_cell.LSTMCell(size, size, use_peepholes=True, cell_clip=2.)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size])
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs)]
        outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)

        fc8 = self.alexnet.layers['fc8']
        print(fc8.get_shape())
        with tf.name_scope('image_features'):
            w = tf.get_variable('Weights', [1000, config.image_features_size])
            b = tf.get_variable('Biases', [config.image_features_size])
            image_features = tf.nn.sigmoid(tf.matmul(fc8, w) + b)

        image_features_size = config.image_features_size#int(image_features.get_shape().num_elements() / batch_size)

        #outputs = [tf.concat(1, [o, image_features, i]) for o, i in zip(outputs, inputs)]
        #new_size = size + image_features_size + size    # The size of input and output is size
        outputs = [tf.concat(1, [o, image_features]) for o in outputs]
        new_size = size + image_features_size

        output = tf.concat(1, outputs)
        output = tf.reshape(output, [-1, new_size])
        self.outputs = output
        softmax_w = tf.get_variable("softmax_w", [new_size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        weights = tf.ones([batch_size * num_steps])
        loss = seq2seq.sequence_loss_by_example([logits], [
            tf.reshape(self._targets, [-1])
        ], [weights], vocab_size)
        self.logits = logits
        self._cost = cost = tf.reduce_sum(loss) * (1.0 / batch_size)
        self._final_state = states[-1]
        self.probs = tf.nn.softmax(logits)

        if not is_training:
            self._train_op = tf.no_op()
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm)
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step_tensor)

        tf.scalar_summary('perplexity', cost)
        tf.histogram_summary('loss', loss)
        tf.histogram_summary('probs', self.probs)
コード例 #42
0
ファイル: ptb_word_lm.py プロジェクト: gemoku/word_lstm
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        # Slightly better results can be obtained with forget gate biases
        # initialized to 1 but the hyperparameters of the model would need to be
        # different than reported in the paper.
        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size])
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        # Simplified version of tensorflow.models.rnn.rnn.py's rnn().
        # This builds an unrolled LSTM for tutorial purposes only.
        # In general, use the rnn() or state_saving_rnn() from rnn.py.
        #
        # The alternative version of the code below is:
        #
        # from tensorflow.models.rnn import rnn
        # inputs = [tf.squeeze(input_, [1])
        #           for input_ in tf.split(1, num_steps, inputs)]
        # outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)
        outputs = []
        states = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)
                states.append(state)

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        logits = tf.nn.xw_plus_b(output,
                                 tf.get_variable("softmax_w", [size, vocab_size]),
                                 tf.get_variable("softmax_b", [vocab_size]))

        self.probs = tf.nn.softmax(self.logits)

        loss = seq2seq.sequence_loss_by_example([logits],
                                                [tf.reshape(self._targets, [-1])],
                                                [tf.ones([batch_size * num_steps])],
                                                vocab_size)
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = states[-1]

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #43
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn': cell_fn = jzRNNCell
        elif args.model == 'gru': cell_fn = jzGRUCell
        elif args.model == 'lstm': cell_fn = jzLSTMCell
        else: raise Exception("model type not supported: {}".format(args.model))

        if args.activation == 'tanh': cell_af = tf.tanh
        elif args.activation == 'sigmoid': cell_af = tf.sigmoid
        elif args.activation == 'relu': cell_af = tf.nn.relu
        else: raise Exception("activation function not supported: {}".format(args.activation))

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])

        with tf.variable_scope('rnnlm'):
            if not args.bidirectional:
                softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            else:
                softmax_w = tf.get_variable("softmax_w", [args.rnn_size*2, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.nn.dropout(tf.squeeze(input_, [1]),args.dropout) for input_ in inputs]

        # one-directional RNN (nothing changed here..)
        if not args.bidirectional:
            cell = cell_fn(args.rnn_size,activation=cell_af)
            self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
            self.initial_state = cell.zero_state(args.batch_size, tf.float32)
            def loop(prev, _):
                prev = tf.matmul(prev, softmax_w) + softmax_b
                prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
                return tf.nn.embedding_lookup(embedding, prev_symbol)
            outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
            output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])

        # bi-directional RNN
        else:
            lstm_fw = cell_fn(args.rnn_size,activation=cell_af)
            lstm_bw = cell_fn(args.rnn_size,activation=cell_af)
            self.lstm_fw = lstm_fw = rnn_cell.MultiRNNCell([lstm_fw]*args.num_layers)
            self.lstm_bw = lstm_bw = rnn_cell.MultiRNNCell([lstm_bw]*args.num_layers)
            self.initial_state_fw = lstm_fw.zero_state(args.batch_size,tf.float32)
            self.initial_state_bw = lstm_bw.zero_state(args.batch_size,tf.float32)
            outputs,_,_ = rnn.bidirectional_rnn(lstm_fw, lstm_bw, inputs,
                                            initial_state_fw=self.initial_state_fw,
                                            initial_state_bw=self.initial_state_bw,
                                                sequence_length=args.batch_size) 
            output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size*2])

        self.logits = tf.matmul(tf.nn.dropout(output,args.dropout), softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #44
0
ファイル: ptb_word_lm.py プロジェクト: ttamada/nmt_tensorflow
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        # Slightly better results can be obtained with forget gate biases
        # initialized to 1 but the hyperparameters of the model would need to be
        # different than reported in the paper.
        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size])
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        # Simplified version of tensorflow.models.rnn.rnn.py's rnn().
        # This builds an unrolled LSTM for tutorial purposes only.
        # In general, use the rnn() or state_saving_rnn() from rnn.py.
        #
        # The alternative version of the code below is:
        #
        # from tensorflow.models.rnn import rnn
        # inputs = [tf.squeeze(input_, [1])
        #           for input_ in tf.split(1, num_steps, inputs)]
        # outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)
        outputs = []
        states = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)
                states.append(state)

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        logits = tf.nn.xw_plus_b(
            output, tf.get_variable("softmax_w", [size, vocab_size]),
            tf.get_variable("softmax_b", [vocab_size]))
        loss = seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])], vocab_size)
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = states[-1]

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #45
0
ファイル: lm_models.py プロジェクト: chagge/attentive_lm
    def __init__(self,
                 is_training,
                 learning_rate=1.0,
                 optimizer="sgd",
                 max_grad_norm=5,
                 num_layers=2,
                 use_lstm=True,
                 num_steps=35,
                 num_steps_valid=120,
                 proj_size=650,
                 hidden_size=650,
                 hidden_proj=650,
                 num_samples=512,
                 init_scale=0.1,
                 dropout_rate=0.0,
                 lr_decay=0.8,
                 batch_size=20,
                 attentive=False,
                 projection_attention_f=None,
                 output_form=lm_ops.OUTPUT_CONCAT,
                 vocab_size=10000):

        with tf.device("/gpu:0"):

            if attentive:
                assert projection_attention_f is not None

            self.batch_size = batch_size = batch_size
            self.num_steps = num_steps
            self.num_steps_valid = num_steps_valid
            vocab_size = vocab_size

            self._input_data_train = []
            self._targets_train = []
            self.mask_train = []

            for i in xrange(num_steps):  # Last bucket is the biggest one.
                self.input_data_train.append(tf.placeholder(tf.int32, shape=[None], name="input_train{0}".format(i)))
                self.targets_train.append(tf.placeholder(tf.int32, shape=[None], name="target_train{0}".format(i)))
                self.mask_train.append(tf.placeholder(tf.float32, shape=[None], name="mask_train{0}".format(i)))

            self._input_data_valid = []
            self._targets_valid = []
            self.mask_valid = []

            for i in xrange(num_steps_valid):  # Last bucket is the biggest one.
                self.input_data_valid.append(tf.placeholder(tf.int32, shape=[None], name="input_valid{0}".format(i)))
                self.targets_valid.append(tf.placeholder(tf.int32, shape=[None], name="target_valid{0}".format(i)))
                self.mask_valid.append(tf.placeholder(tf.float32, shape=[None], name="mask_valid{0}".format(i)))

            hidden_projection = None
            if hidden_proj > 0:
                hidden_projection = hidden_proj

            self.cell = cells.build_lm_multicell_rnn(num_layers, hidden_size, proj_size, use_lstm=use_lstm,
                                                     hidden_projection=hidden_projection, dropout=dropout_rate)

            self.dropout_feed = tf.placeholder(tf.float32, name="dropout_rate")

            self._initial_state_train = self.cell.zero_state(batch_size, tf.float32)
            self._initial_state_valid = self.cell.zero_state(1, tf.float32)

            # learning rate ops
            self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
            self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * lr_decay)

            # epoch ops
            self.epoch = tf.Variable(0, trainable=False)
            self.epoch_update_op = self.epoch.assign(self.epoch + 1)

            # samples seen ops
            self.samples_seen = tf.Variable(0, trainable=False)
            self.samples_seen_update_op = self.samples_seen.assign(self.samples_seen + batch_size)
            self.samples_seen_reset_op = self.samples_seen.assign(0)

            # global step variable - controled by the model
            self.global_step = tf.Variable(0.0, trainable=False)

            # average loss ops
            self.current_ppx = tf.Variable(1.0, trainable=False)
            self.current_loss = tf.Variable(0.0, trainable=False)
            # self.current_loss_update_op = None

            self.best_eval_ppx = tf.Variable(numpy.inf, trainable=False)
            self.estop_counter = tf.Variable(0, trainable=False)
            self.estop_counter_update_op = self.estop_counter.assign(self.estop_counter + 1)
            self.estop_counter_reset_op = self.estop_counter.assign(0)

            initializer = tf.random_uniform_initializer(minval=init_scale, maxval=init_scale, seed=_SEED)

            out_proj = hidden_size
            if hidden_proj > 0:
                out_proj = hidden_proj

            with tf.device("/cpu:0"):
                w = tf.get_variable("proj_w", [out_proj, vocab_size])
                w_t = tf.transpose(w)
                b = tf.get_variable("proj_b", [vocab_size])
            self.output_projection = (w, b)

            sampled_softmax = False

            # Sampled softmax only makes sense if we sample less than vocabulary size.
            if 0 < num_samples < vocab_size:
                sampled_softmax = True

                def sampled_loss(logits, labels):
                    with tf.device("/cpu:0"):
                        labels = tf.reshape(labels, [-1, 1])
                        losses = tf.nn.sampled_softmax_loss(w_t, b, logits, labels, num_samples, vocab_size)
                        return losses

                loss_function = sampled_loss

            with tf.device("/cpu:0"):
                # input come as one big tensor so we have to split it into a list of tensors to run the rnn cell
                embedding = tf.Variable(
                    tf.random_uniform(
                        [vocab_size, proj_size],
                        minval=-init_scale, maxval=init_scale
                    ),
                    name="embedding"
                )
                # embedding = tf.get_variable("embedding", [vocab_size, proj_size])

                inputs_train = [tf.nn.embedding_lookup(embedding, i) for i in self.input_data_train]
                inputs_valid = [tf.nn.embedding_lookup(embedding, i) for i in self.input_data_valid]

            with tf.variable_scope("RNN", initializer=initializer):

                if attentive:
                    outputs_train, state_train, _ = lm_ops.apply_attentive_lm(
                        self.cell, inputs_train, sequence_length=array_ops.squeeze(math_ops.add_n(self.mask_train)),
                        projection_attention_f=projection_attention_f, output_form=output_form,
                        dropout=self.dropout_feed, initializer=initializer, dtype=tf.float32
                    )

                    outputs_valid, state_valid, _ = lm_ops.apply_attentive_lm(
                        self.cell, inputs_valid, sequence_length=array_ops.squeeze(math_ops.add_n(self.mask_valid)),
                        projection_attention_f=projection_attention_f, output_form=output_form,
                        dropout=self.dropout_feed, initializer=initializer, dtype=tf.float32
                    )

                else:
                   outputs_train, state_train = lm_ops.apply_lm(
                       self.cell, inputs_train, sequence_length=math_ops.add_n(self.mask_train),
                       dropout=self.dropout_feed, dtype=tf.float32
                   )

                   outputs_valid, state_valid = lm_ops.apply_lm(
                       self.cell, inputs_valid, sequence_length=math_ops.add_n(self.mask_valid),
                       dropout=self.dropout_feed, dtype=tf.float32
                   )

                if sampled_softmax is False:
                    logits_train = [tf.nn.xw_plus_b(o, self.output_projection[0], self.output_projection[1])
                                    for o in outputs_train]
                    logits_valid = [tf.nn.xw_plus_b(o, self.output_projection[0], self.output_projection[1])
                                    for o in outputs_valid]
                else:
                    logits_train = outputs_train
                    logits_valid = outputs_valid

            loss_train = seq2seq.sequence_loss_by_example(
                logits_train, self.targets_train, self.mask_train, average_across_timesteps=True
            )

            loss_valid = seq2seq.sequence_loss_by_example(
                logits_valid, self.targets_valid, self.mask_valid, average_across_timesteps=True
            )

            self._cost_train = cost = tf.reduce_sum(loss_train) / float(batch_size)
            self._final_state_train = state_train

            self._cost_valid = tf.reduce_sum(loss_valid) / float(batch_size)
            self._final_state_valid = state_valid

            if not is_training:
                return

            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                              max_grad_norm)

            opt = optimization_ops.get_optimizer(optimizer, learning_rate)
            self._train_op = opt.apply_gradients(zip(grads, tvars), global_step=self.global_step)
            self._valid_op = tf.no_op()

            self.saver = tf.train.Saver(tf.all_variables())
            self.saver_best = tf.train.Saver(tf.all_variables())
コード例 #46
0
ファイル: nmt_models.py プロジェクト: yeab/tsf_nmt
def model_with_buckets(encoder_inputs, decoder_inputs, targets, weights,
                       buckets, seq2seq_f, softmax_loss_function=None,
                       per_example_loss=False, name=None):
    """Create a sequence-to-sequence model with support for bucketing.

    The seq2seq argument is a function that defines a sequence-to-sequence model,
    e.g., seq2seq = lambda x, y: basic_rnn_seq2seq(x, y, rnn_cell.GRUCell(24))

    Args:
      encoder_inputs: A list of Tensors to feed the encoder; first seq2seq input.
      decoder_inputs: A list of Tensors to feed the decoder; second seq2seq input.
      targets: A list of 1D batch-sized int32 Tensors (desired output sequence).
      weights: List of 1D batch-sized float-Tensors to weight the targets.
      buckets: A list of pairs of (input size, output size) for each bucket.
      seq2seq_f: A sequence-to-sequence model function; it takes 2 input that
        agree with encoder_inputs and decoder_inputs, and returns a pair
        consisting of outputs and states (as, e.g., basic_rnn_seq2seq).
      softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch
        to be used instead of the standard softmax (the default if this is None).
      per_example_loss: Boolean. If set, the returned loss will be a batch-sized
        tensor of losses for each sequence in the batch. If unset, it will be
        a scalar with the averaged loss from all examples.
      name: Optional name for this operation, defaults to "model_with_buckets".

    Returns:
      A tuple of the form (outputs, losses), where:
        outputs: The outputs for each bucket. Its j'th element consists of a list
          of 2D Tensors of shape [batch_size x num_decoder_symbols] (jth outputs).
        losses: List of scalar Tensors, representing losses for each bucket, or,
          if per_example_loss is set, a list of 1D batch-sized float Tensors.

    Raises:
      ValueError: If length of encoder_inputsut, targets, or weights is smaller
        than the largest (last) bucket.
    """
    if len(encoder_inputs) < buckets[-1][0]:
        raise ValueError("Length of encoder_inputs (%d) must be at least that of la"
                         "st bucket (%d)." % (len(encoder_inputs), buckets[-1][0]))
    if len(targets) < buckets[-1][1]:
        raise ValueError("Length of targets (%d) must be at least that of last"
                         "bucket (%d)." % (len(targets), buckets[-1][1]))
    if len(weights) < buckets[-1][1]:
        raise ValueError("Length of weights (%d) must be at least that of last"
                         "bucket (%d)." % (len(weights), buckets[-1][1]))

    all_inputs = encoder_inputs + decoder_inputs + targets + weights
    losses = []
    outputs = []
    with ops.op_scope(all_inputs, name, "model_with_buckets"):
        for j, bucket in enumerate(buckets):
            with variable_scope.variable_scope(variable_scope.get_variable_scope(),
                                               reuse=True if j > 0 else None):
                bucket_outputs, _ = seq2seq_f(encoder_inputs[:bucket[0]],
                                            decoder_inputs[:bucket[1]])
                outputs.append(bucket_outputs)

                if per_example_loss:
                    losses.append(seq2seq.sequence_loss_by_example(
                        outputs[-1], targets[:bucket[1]], weights[:bucket[1]],
                        average_across_timesteps=True,
                        softmax_loss_function=softmax_loss_function))
                else:
                    losses.append(seq2seq.sequence_loss(
                        outputs[-1], targets[:bucket[1]], weights[:bucket[1]],
                        average_across_timesteps=True,
                        softmax_loss_function=softmax_loss_function))

    return outputs, losses
コード例 #47
0
    def __init__(self,
                 is_training,
                 learning_rate=1.0,
                 optimizer="sgd",
                 max_grad_norm=5,
                 num_layers=2,
                 use_lstm=True,
                 num_steps=35,
                 num_steps_valid=120,
                 proj_size=650,
                 hidden_size=650,
                 hidden_proj=650,
                 num_samples=512,
                 init_scale=0.1,
                 dropout_rate=0.0,
                 lr_decay=0.8,
                 batch_size=20,
                 attentive=False,
                 projection_attention_f=None,
                 output_form=lm_ops.OUTPUT_CONCAT,
                 vocab_size=10000):

        with tf.device("/gpu:0"):

            if attentive:
                assert projection_attention_f is not None

            self.batch_size = batch_size = batch_size
            self.num_steps = num_steps
            self.num_steps_valid = num_steps_valid
            vocab_size = vocab_size

            self._input_data_train = []
            self._targets_train = []
            self.mask_train = []

            for i in xrange(num_steps):  # Last bucket is the biggest one.
                self.input_data_train.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="input_train{0}".format(i)))
                self.targets_train.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="target_train{0}".format(i)))
                self.mask_train.append(
                    tf.placeholder(tf.float32,
                                   shape=[None],
                                   name="mask_train{0}".format(i)))

            self._input_data_valid = []
            self._targets_valid = []
            self.mask_valid = []

            for i in xrange(
                    num_steps_valid):  # Last bucket is the biggest one.
                self.input_data_valid.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="input_valid{0}".format(i)))
                self.targets_valid.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="target_valid{0}".format(i)))
                self.mask_valid.append(
                    tf.placeholder(tf.float32,
                                   shape=[None],
                                   name="mask_valid{0}".format(i)))

            hidden_projection = None
            if hidden_proj > 0:
                hidden_projection = hidden_proj

            self.cell = cells.build_lm_multicell_rnn(
                num_layers,
                hidden_size,
                proj_size,
                use_lstm=use_lstm,
                hidden_projection=hidden_projection,
                dropout=dropout_rate)

            self.dropout_feed = tf.placeholder(tf.float32, name="dropout_rate")

            self._initial_state_train = self.cell.zero_state(
                batch_size, tf.float32)
            self._initial_state_valid = self.cell.zero_state(1, tf.float32)

            # learning rate ops
            self.learning_rate = tf.Variable(float(learning_rate),
                                             trainable=False)
            self.learning_rate_decay_op = self.learning_rate.assign(
                self.learning_rate * lr_decay)

            # epoch ops
            self.epoch = tf.Variable(0, trainable=False)
            self.epoch_update_op = self.epoch.assign(self.epoch + 1)

            # samples seen ops
            self.samples_seen = tf.Variable(0, trainable=False)
            self.samples_seen_update_op = self.samples_seen.assign(
                self.samples_seen + batch_size)
            self.samples_seen_reset_op = self.samples_seen.assign(0)

            # global step variable - controled by the model
            self.global_step = tf.Variable(0.0, trainable=False)

            # average loss ops
            self.current_ppx = tf.Variable(1.0, trainable=False)
            self.current_loss = tf.Variable(0.0, trainable=False)
            # self.current_loss_update_op = None

            self.best_eval_ppx = tf.Variable(numpy.inf, trainable=False)
            self.estop_counter = tf.Variable(0, trainable=False)
            self.estop_counter_update_op = self.estop_counter.assign(
                self.estop_counter + 1)
            self.estop_counter_reset_op = self.estop_counter.assign(0)

            initializer = tf.random_uniform_initializer(minval=init_scale,
                                                        maxval=init_scale,
                                                        seed=_SEED)

            out_proj = hidden_size
            if hidden_proj > 0:
                out_proj = hidden_proj

            with tf.device("/cpu:0"):
                w = tf.get_variable("proj_w", [out_proj, vocab_size])
                w_t = tf.transpose(w)
                b = tf.get_variable("proj_b", [vocab_size])
            self.output_projection = (w, b)

            sampled_softmax = False

            # Sampled softmax only makes sense if we sample less than vocabulary size.
            if 0 < num_samples < vocab_size:
                sampled_softmax = True

                def sampled_loss(logits, labels):
                    with tf.device("/cpu:0"):
                        labels = tf.reshape(labels, [-1, 1])
                        losses = tf.nn.sampled_softmax_loss(
                            w_t, b, logits, labels, num_samples, vocab_size)
                        return losses

                loss_function = sampled_loss

            with tf.device("/cpu:0"):
                # input come as one big tensor so we have to split it into a list of tensors to run the rnn cell
                embedding = tf.Variable(tf.random_uniform(
                    [vocab_size, proj_size],
                    minval=-init_scale,
                    maxval=init_scale),
                                        name="embedding")
                # embedding = tf.get_variable("embedding", [vocab_size, proj_size])

                inputs_train = [
                    tf.nn.embedding_lookup(embedding, i)
                    for i in self.input_data_train
                ]
                inputs_valid = [
                    tf.nn.embedding_lookup(embedding, i)
                    for i in self.input_data_valid
                ]

            with tf.variable_scope("RNN", initializer=initializer):

                if attentive:
                    outputs_train, state_train, _ = lm_ops.apply_attentive_lm(
                        self.cell,
                        inputs_train,
                        sequence_length=array_ops.squeeze(
                            math_ops.add_n(self.mask_train)),
                        projection_attention_f=projection_attention_f,
                        output_form=output_form,
                        dropout=self.dropout_feed,
                        initializer=initializer,
                        dtype=tf.float32)

                    outputs_valid, state_valid, _ = lm_ops.apply_attentive_lm(
                        self.cell,
                        inputs_valid,
                        sequence_length=array_ops.squeeze(
                            math_ops.add_n(self.mask_valid)),
                        projection_attention_f=projection_attention_f,
                        output_form=output_form,
                        dropout=self.dropout_feed,
                        initializer=initializer,
                        dtype=tf.float32)

                else:
                    outputs_train, state_train = lm_ops.apply_lm(
                        self.cell,
                        inputs_train,
                        sequence_length=math_ops.add_n(self.mask_train),
                        dropout=self.dropout_feed,
                        dtype=tf.float32)

                    outputs_valid, state_valid = lm_ops.apply_lm(
                        self.cell,
                        inputs_valid,
                        sequence_length=math_ops.add_n(self.mask_valid),
                        dropout=self.dropout_feed,
                        dtype=tf.float32)

                if sampled_softmax is False:
                    logits_train = [
                        tf.nn.xw_plus_b(o, self.output_projection[0],
                                        self.output_projection[1])
                        for o in outputs_train
                    ]
                    logits_valid = [
                        tf.nn.xw_plus_b(o, self.output_projection[0],
                                        self.output_projection[1])
                        for o in outputs_valid
                    ]
                else:
                    logits_train = outputs_train
                    logits_valid = outputs_valid

            loss_train = seq2seq.sequence_loss_by_example(
                logits_train,
                self.targets_train,
                self.mask_train,
                average_across_timesteps=True)

            loss_valid = seq2seq.sequence_loss_by_example(
                logits_valid,
                self.targets_valid,
                self.mask_valid,
                average_across_timesteps=True)

            self._cost_train = cost = tf.reduce_sum(loss_train) / float(
                batch_size)
            self._final_state_train = state_train

            self._cost_valid = tf.reduce_sum(loss_valid) / float(batch_size)
            self._final_state_valid = state_valid

            if not is_training:
                return

            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                              max_grad_norm)

            opt = optimization_ops.get_optimizer(optimizer, learning_rate)
            self._train_op = opt.apply_gradients(zip(grads, tvars),
                                                 global_step=self.global_step)
            self._valid_op = tf.no_op()

            self.saver = tf.train.Saver(tf.all_variables())
            self.saver_best = tf.train.Saver(tf.all_variables())