예제 #1
0
    def __init__(self, rnn_size, num_layers, batch_size, seq_length, vocab_size, grad_clip,\
                         infer=False):
        """
        Constructor for an RNN using LSTMs.
        @param rnn_size: The size of the RNN
        @param num_layers: The number of layers for the RNN to have
        @param batch_size: The batch size to train with
        @param seq_length: The length of the sequences to use in training
        @param vocab_size: The size of the vocab
        @param grad_clip: The point at which to clip the gradient in the gradient descent
        @param infer:
        """
        #TODO: During training, (and when sampling), the input to the RNN should be
        #      the list of ingredients that goes with that recipe text.
        if infer:
            batch_size = 1
            seq_length = 1

        cell_fn = rnn_cell.GRUCell  #BasicLSTMCell
        cell = cell_fn(rnn_size)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * num_layers)

        self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.targets = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.variable_scope("rnnlm"):
            softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
            softmax_b = tf.get_variable("softmax_b", [vocab_size])
            with (tf.device("/cpu:0")):
                embedding = tf.get_variable("embedding",
                                            [vocab_size, rnn_size])
                inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(\
                                                    embedding, self.input_data))
                inputs = [tf.squeeze(inp, [1]) for inp in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        loop_func = loop if infer else None
        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state,\
                                        cell, loop_function=loop_func, scope="rnnlm")
        output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b

        self.probs = tf.nn.softmax(self.logits)

        loss = seq2seq.sequence_loss_by_example([self.logits],\
                            [tf.reshape(self.targets, [-1])],\
                            [tf.ones([batch_size * seq_length])], vocab_size)
        self.cost = tf.reduce_sum(loss) / batch_size / seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
예제 #2
0
    def build_graph(self, test):
        """
        Builds an LSTM graph in TensorFlow.
        """
        if test:
            self.batch_size = 1
            self.seq_len = 1

        lstm_cell = rnn_cell.BasicLSTMCell(self.cell_size)
        self.cell = rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers)

        self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.seq_len])
        self.targets = tf.placeholder(tf.int32,
                                      [self.batch_size, self.seq_len])
        self.initial_state = self.cell.zero_state(self.batch_size, tf.float32)

        with tf.variable_scope('lstm_vars'):
            self.ws = tf.get_variable('ws', [self.cell_size, self.vocab_size])
            self.bs = tf.get_variable('bs', [self.vocab_size])

            with tf.device('/cpu:0'):
                self.embeddings = tf.get_variable(
                    'embeddings', [self.vocab_size, self.cell_size])

                input_embeddings = tf.nn.embedding_lookup(
                    self.embeddings, self.inputs)

                inputs_split = tf.split(1, self.seq_len, input_embeddings)
                inputs_split = [
                    tf.squeeze(input_, [1]) for input_ in inputs_split
                ]

        def loop(prev, _):
            prev = tf.matmul(prev, self.ws) + self.bs
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(self.embeddings, prev_symbol)

        lstm_outputs_split, self.final_state = seq2seq.rnn_decoder(
            inputs_split,
            self.initial_state,
            self.cell,
            loop_function=loop if test else None,
            scope='lstm_vars')
        lstm_outputs = tf.reshape(tf.concat(1, lstm_outputs_split),
                                  [-1, self.cell_size])

        logits = tf.matmul(lstm_outputs, self.ws) + self.bs
        self.probs = tf.nn.softmax(logits)

        total_loss = seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([self.batch_size * self.seq_len])], self.vocab_size)
        self.loss = tf.reduce_sum(total_loss) / self.batch_size / self.seq_len

        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        self.optimizer = tf.train.AdamOptimizer(learning_rate=c.L_RATE,
                                                name='optimizer')
        self.train_op = self.optimizer.minimize(self.loss,
                                                global_step=self.global_step,
                                                name='train_op')
예제 #3
0
    def build_decoder_rnn(self, first_step):
        with tf.variable_scope("rnnlm"):
            if first_step:
                rnn_input = tf.matmul(self.fc7, self.encode_img_W) + self.encode_img_b
            else:
                self.decoder_prev_word = tf.placeholder(tf.int32, [None])
                rnn_input = tf.nn.embedding_lookup(self.Wemb, self.decoder_prev_word)

            self.batch_size = tf.shape(rnn_input)[0]

            tf.get_variable_scope().reuse_variables()

            self.decoder_cell = rnn_cell.MultiRNNCell([self.basic_cell] * self.opt.num_layers, state_is_tuple = False)
            state_size = self.decoder_cell.state_size
            if not first_step:
                self.decoder_initial_state = initial_state = tf.placeholder(tf.float32, 
                    [None, state_size])
            else:
                initial_state = self.decoder_cell.zero_state(
                    self.batch_size, tf.float32)

            outputs, state = seq2seq.rnn_decoder([rnn_input], initial_state, self.decoder_cell)
            #outputs, state = tf.nn.rnn(self.decoder_cell, [rnn_input], initial_state)
            logits = tf.matmul(outputs[0], self.embed_word_W) + self.embed_word_b
            decoder_probs = tf.reshape(tf.nn.softmax(logits), [self.batch_size, self.vocab_size + 1])
            decoder_state = state
        return [decoder_probs, decoder_state]
예제 #4
0
    def build_generator(self):
        with tf.variable_scope("rnnlm"):
            image_emb = tf.matmul(self.fc7, self.encode_img_W) + self.encode_img_b

            rnn_inputs = tf.split(1, self.seq_length + 1, tf.zeros([self.batch_size, self.seq_length + 1, self.input_encoding_size]))
            rnn_inputs = [tf.squeeze(input_, [1]) for input_ in rnn_inputs]
            rnn_inputs = [image_emb] + rnn_inputs

            initial_state = self.cell.zero_state(self.batch_size, tf.float32)

            # Always pick the word with largest probability as the input of next time step
            def loop(prev, i):
                if i == 1:
                    return rnn_inputs[1]
                prev = tf.matmul(prev, self.embed_word_W) + self.embed_word_b
                prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
                return tf.nn.embedding_lookup(self.Wemb, prev_symbol)

            tf.get_variable_scope().reuse_variables()
            outputs, last_state = seq2seq.rnn_decoder(rnn_inputs, initial_state, self.cell, loop_function=loop)
            #outputs, last_state = tf.nn.rnn(self.cell, rnn_inputs, initial_state)
            self.g_output = output = tf.reshape(tf.concat(1, outputs[1:]), [-1, self.rnn_size]) # outputs[1:], because we don't calculate loss on time 0.
            self.g_logits = logits = tf.matmul(output, self.embed_word_W) + self.embed_word_b
            self.g_probs = probs = tf.reshape(tf.nn.softmax(logits), [self.batch_size, self.seq_length + 1, self.vocab_size + 1])

        self.generator = tf.argmax(probs, 2)
예제 #5
0
    def _create_encoder(self, args):
        # Create LSTM portion of network
        lstm = rnn_cell.LSTMCell(args.encoder_size,
                                 state_is_tuple=True,
                                 initializer=initializers.xavier_initializer())
        self.full_lstm = rnn_cell.MultiRNNCell([lstm] *
                                               args.num_encoder_layers,
                                               state_is_tuple=True)
        self.lstm_state = self.full_lstm.zero_state(args.batch_size,
                                                    tf.float32)

        # Forward pass
        encoder_input = tf.concat(1, [self.states_encode, self.actions_encode])
        output, self.final_state = seq2seq.rnn_decoder([encoder_input],
                                                       self.lstm_state,
                                                       self.full_lstm)
        output = tf.reshape(tf.concat(1, output), [-1, args.encoder_size])

        # Fully connected layer to latent variable distribution parameters
        W = tf.get_variable("latent_w", [args.encoder_size, 2 * args.z_dim],
                            initializer=initializers.xavier_initializer())
        b = tf.get_variable("latent_b", [2 * args.z_dim])
        logits = tf.nn.xw_plus_b(output, W, b)

        # Separate into mean and logstd
        self.z_mean, self.z_logstd = tf.split(1, 2, logits)
예제 #6
0
    def _create_lstm_policy(self, args):
        # Create LSTM portion of network
        lstm = rnn_cell.LSTMCell(args.policy_size,
                                 state_is_tuple=True,
                                 initializer=initializers.xavier_initializer())
        self.full_lstm = rnn_cell.MultiRNNCell([lstm] * args.num_policy_layers,
                                               state_is_tuple=True)
        self.lstm_state = self.full_lstm.zero_state(args.batch_size,
                                                    tf.float32)

        # Forward pass
        policy_input = self.states
        output, self.final_state = seq2seq.rnn_decoder([policy_input],
                                                       self.lstm_state,
                                                       self.full_lstm)
        output = tf.reshape(tf.concat(1, output), [-1, args.policy_size])

        # Fully connected layer to latent variable distribution parameters
        W = tf.get_variable("lstm_w", [args.policy_size, args.action_dim],
                            initializer=initializers.xavier_initializer())
        b = tf.get_variable("lstm_b", [args.action_dim])
        self.a_mean = tf.nn.xw_plus_b(output, W, b)

        # Initialize logstd
        self.a_logstd = tf.Variable(np.zeros(args.action_dim),
                                    name="a_logstd",
                                    dtype=tf.float32)
예제 #7
0
    def __init__(self, args, infer=False):
        self.args = args
        training = not infer
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)
        if training and args.dropout > 0:
            cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=1.0-args.dropout)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                self.embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.nn.embedding_lookup(self.embedding, self.input_data)
                if training and args.dropout > 0:
                    inputs = tf.nn.dropout(inputs, args.dropout)
                inputs = tf.split(1, args.seq_length, inputs)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(self.embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        if not infer:
            self.lr = tf.Variable(0.0, trainable=False)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                    args.grad_clip)
            optimizer = tf.train.AdamOptimizer(self.lr)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
예제 #8
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.rnncell == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.rnncell == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.rnncell == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("rnncell type not supported: {}".format(args.rnncell))

        cell = cell_fn(args.rnn_size)
        self.cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)
	self.attn_length = 5
	self.attn_size = 32
	self.attention_states = tf.placeholder(tf.float32,[args.batch_size, self.attn_length, self.attn_size]) 
        with tf.variable_scope('rnnlm'):
            softmax_w = build_weight([args.rnn_size, args.vocab_size],name='soft_w')
            softmax_b = build_weight([args.vocab_size],name='soft_b')
            word_embedding = build_weight([args.vocab_size, args.embedding_size],name='word_embedding')
            inputs_list = tf.split(1, args.seq_length, tf.nn.embedding_lookup(word_embedding, self.input_data))
            inputs_list = [tf.squeeze(input_, [1]) for input_ in inputs_list]
        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

	if not args.attention:
            outputs, last_state = seq2seq.rnn_decoder(inputs_list, self.initial_state, self.cell, loop_function=loop if infer else None, scope='rnnlm')
	else:
            outputs, last_state = attention_decoder(inputs_list, self.initial_state, self.attention_states, self.cell, loop_function=loop if infer else None, scope='rnnlm')

        self.final_state = last_state
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
	# average loss for each word of each timestep
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.lr = tf.Variable(0.0, trainable=False)
	self.var_trainable_op = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, self.var_trainable_op),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, self.var_trainable_op))
	self.initial_op = tf.global_variables_initializer()
	self.logfile = args.log_dir+str(datetime.datetime.strftime(datetime.datetime.now(),'%Y-%m-%d %H:%M:%S')+'.txt').replace(' ','').replace('/','')
	self.var_op = tf.global_variables()
	self.saver = tf.train.Saver(self.var_op,max_to_keep=4,keep_checkpoint_every_n_hours=1)
예제 #9
0
def basic_rnn_seq2seq_with_loop_function(
        encoder_inputs, decoder_inputs, cell, dtype=dtypes.float32, loop_function=None, scope=None):
    """Basic RNN sequence-to-sequence model. Edited for a loopback function. Don't know why this isn't in the
    current library
    """
    with variable_scope.variable_scope(scope or "basic_rnn_seq2seq_with_loop_function"):
        _, enc_state = rnn.rnn(cell, encoder_inputs, dtype=dtype)
        return rnn_decoder(decoder_inputs, enc_state, cell, loop_function=loop_function)
예제 #10
0
    def build_model(self):
        with tf.name_scope("batch_size"):
            self.batch_size = tf.shape(self.images)[0]
        with tf.variable_scope("rnnlm"):
            image_emb = tf.matmul(self.fc7, self.encode_img_W) + self.encode_img_b

            # Replicate self.seq_per_img times for each image embedding
            image_emb = tf.reshape(tf.tile(tf.expand_dims(image_emb, 1), [1, self.seq_per_img, 1]), [self.batch_size * self.seq_per_img, self.input_encoding_size])

            rnn_inputs = tf.split(1, self.seq_length + 1, tf.nn.embedding_lookup(self.Wemb, self.labels[:,:self.seq_length + 1]))
            rnn_inputs = [tf.squeeze(input_, [1]) for input_ in rnn_inputs]
            rnn_inputs = [image_emb] + rnn_inputs

            initial_state = self.cell.zero_state(self.batch_size * self.seq_per_img, tf.float32)

            outputs, last_state = seq2seq.rnn_decoder(rnn_inputs, initial_state, self.cell, loop_function=None)
            #outputs, last_state = tf.nn.rnn(self.cell, rnn_inputs, initial_state)

            self.logits = [tf.matmul(output, self.embed_word_W) + self.embed_word_b for output in outputs[1:]]
        with tf.variable_scope("loss"):
            loss = seq2seq.sequence_loss_by_example(self.logits,
                    [tf.squeeze(label, [1]) for label in tf.split(1, self.seq_length + 1, self.labels[:, 1:])], # self.labels[:,1:] is the target
                    [tf.squeeze(mask, [1]) for mask in tf.split(1, self.seq_length + 1, self.masks[:, 1:])])
            self.cost = tf.reduce_mean(loss)
        
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        self.cnn_lr = tf.Variable(0.0, trainable=False)

        # Collect the rnn variables, and create the optimizer of rnn
        tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='rnnlm')
        optimizer = tf.train.AdamOptimizer(self.lr, beta1=0.8)
        grads = optimizer.compute_gradients(self.cost, tvars)
        grads_cliped = [(tf.clip_by_value(i, -self.opt.grad_clip, self.opt.grad_clip),j) for i,j in grads if not i is None]
        #grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
        #        self.opt.grad_clip)
        self.train_op = optimizer.apply_gradients(grads_cliped)

        # Collect the cnn variables, and create the optimizer of cnn
        cnn_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg16')
        cnn_optimizer = tf.train.AdamOptimizer(self.cnn_lr, beta1=0.8)     
        cnn_grads = cnn_optimizer.compute_gradients(self.cost, cnn_tvars)
        cnn_grads_cliped = [(tf.clip_by_value(i, -self.opt.grad_clip, self.opt.grad_clip),j) for i,j in cnn_grads if not i is None]
        #cnn_grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, cnn_tvars),
        #        self.opt.grad_clip)
        self.cnn_train_op = cnn_optimizer.apply_gradients(cnn_grads_cliped)

        tf.scalar_summary('training loss', self.cost)
        tf.scalar_summary('learning rate', self.lr)
        tf.scalar_summary('cnn learning rate', self.cnn_lr)
        #for i,j in cnn_grads:
            #if not i is None and j.name.startswith('vgg16_1'): 
                #tf.histogram_summary(j.name+'_v', j)
                #tf.histogram_summary(j.name+'_d', i)
        #for i,j in grads:
            #tf.histogram_summary(j.name+'_v', j)
            #tf.histogram_summary(j.name+'_d', i)
        self.summaries = tf.merge_all_summaries()
예제 #11
0
 def decoder(cell, dec_outputs, states, scope):
     outputs = []
     with variable_scope.variable_scope(scope):
         for i in range(len(states)):
             if i > 0:
                 variable_scope.get_variable_scope().reuse_variables()
             outs, _ = seq2seq.rnn_decoder(dec_outputs, states[i], cell)
             outputs.extend(outs)
     return outputs
예제 #12
0
 def discriminate_wv(self, input_data_wv):
     with tf.variable_scope('DISC', reuse=self.has_init_seq2seq) as scope:
         self.has_init_seq2seq = True
         output_wv, states_wv = seq2seq.rnn_decoder(input_data_wv,
                                                    self.initial_state,
                                                    self.cell,
                                                    scope=scope)
         predicted_classes_wv = tf.matmul(output_wv[-1], self.fc_layer)
     return predicted_classes_wv
예제 #13
0
    def generate(self):
        inputs = tf.split(1, self.args.seq_length, tf.nn.embedding_lookup(self.embedding, self.input_data))
        inputs = map(lambda i: tf.nn.l2_normalize(i, 1), [tf.squeeze(input_, [1]) for input_ in inputs])

        def loop(prev, i):
            return prev

        with tf.variable_scope('GEN', reuse=self.has_init_seq2seq) as scope:
            self.has_init_seq2seq = True
            if self.args.num_layers == 1:
                outputs, last_state = seq2seq.rnn_decoder(inputs, [self.initial_state1], self.cell, loop_function=loop, scope=scope)
            elif self.args.num_layers == 2:
                outputs, last_state = seq2seq.rnn_decoder(inputs, [self.initial_state1, self.initial_state2], self.cell, loop_function=loop, scope=scope)
            else:
                raise Exception('Unsupported number of layers. Use 1 or 2 layers for now..')
            outputs = map(lambda o: tf.nn.l2_normalize(o, 1), outputs)
        self.outputs = outputs
        return outputs
예제 #14
0
  def build_network(self):
    with tf.variable_scope('encoder'):
      z_mean_w = tf.Variable(self.initializer([self._enc_cell.state_size, self.n_latent]))
      z_mean_b = tf.Variable(tf.zeros([self.n_latent], dtype=tf.float32))
      z_logvar_w = tf.Variable(self.initializer([self._enc_cell.state_size, self.n_latent]))
      z_logvar_b = tf.Variable(tf.zeros([self.n_latent], dtype=tf.float32))

      _, enc_state = rnn.rnn(self._enc_cell, self.inputs, dtype=tf.float32)
      self.z_mean = tf.add(tf.matmul(enc_state, z_mean_w), z_mean_b)
      self.z_log_var = tf.add(tf.matmul(enc_state, z_logvar_w), z_logvar_b)
      eps = tf.random_normal((self.batch_size, self.n_latent), 0, 1, dtype=tf.float32)
      self.z = tf.add(self.z_mean, tf.mul(tf.sqrt(tf.exp(self.z_log_var)), eps))
      
    with tf.variable_scope('decoder') as scope:
      dec_in_w = tf.Variable(self.initializer([self.n_latent, self._dec_cell.state_size],
                                              dtype=tf.float32))
      dec_in_b = tf.Variable(tf.zeros([self._dec_cell.state_size], dtype=tf.float32))
      dec_out_w = tf.Variable(self.initializer([self.n_hidden, self.elem_num], dtype=tf.float32))
      dec_out_b = tf.Variable(tf.zeros([self.elem_num], dtype=tf.float32))

      initial_dec_state = self.transfer_func(tf.add(tf.matmul(self.z, dec_in_w), dec_in_b))
      dec_out, _ = seq2seq.rnn_decoder(self.inputs, initial_dec_state, self._dec_cell)
      if self.reverse:
        dec_out = dec_out[::-1]
      dec_output = tf.transpose(tf.pack(dec_out), [1, 0, 2])
      batch_dec_out_w = tf.tile(tf.expand_dims(dec_out_w, 0), [self.batch_size, 1, 1])
      self.output = tf.nn.sigmoid(tf.batch_matmul(dec_output, batch_dec_out_w) + dec_out_b)

      scope.reuse_variables()
      dec_gen_input = [0.5 * tf.ones([self.batch_size, self.elem_num],
                                dtype=tf.float32) for _ in range(self.step_num)]
      self.z_gen = tf.placeholder(tf.float32, [self.batch_size, self.n_latent])
      dec_gen_state = self.transfer_func(
        tf.add(tf.matmul(self.z_gen, dec_in_w), dec_in_b))
      dec_gen_out, _ = seq2seq.rnn_decoder(
        dec_gen_input, dec_gen_state, self._dec_cell)
      if self.reverse:
        dec_gen_out = dec_gen_out[::-1]
      dec_gen_output = tf.transpose(tf.pack(dec_gen_out), [1, 0, 2]) 
      self.gen_output = tf.nn.sigmoid(tf.batch_matmul(dec_gen_output, batch_dec_out_w) + dec_out_b)
    
    self.inp = tf.transpose(tf.pack(self.inputs), [1, 0, 2])
    self.train_loss = self.get_loss()
    self.train = tf.train.AdamOptimizer(self.learning_rate).minimize(self.train_loss)
예제 #15
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size, state_is_tuple=True)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers, state_is_tuple=True)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.empirical_entropy = self.cost/np.log(2)
        tf.summary.scalar('Empircal_Entropy', self.empirical_entropy)
        
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        self.merged_summaries = tf.summary.merge_all()
예제 #16
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:  #When we sample, the batch and sequence lenght are = 1
            args.batch_size = 1
            args.seq_length = 1

        cell_fn = rnn_cell.BasicLSTMCell  #Define the internal cell structure
        cell = cell_fn(args.rnn_size, state_is_tuple=True)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers,
                                                 state_is_tuple=True)
        #Build the inputs and outputs placeholders, and start with a zero internal values
        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable(
                "softmax_w", [args.rnn_size, args.vocab_size])  #Final w
            softmax_b = tf.get_variable("softmax_b",
                                        [args.vocab_size])  #Final bias
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding",
                                            [args.vocab_size, args.rnn_size])
                inputs = tf.split(
                    1, args.seq_length,
                    tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if infer else None,
            scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])], args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
	def __init__(self, rnn_size, num_layers, batch_size, seq_length, vocabulary_size, gradient_clip, sample=False):

		lstm_cell = rnn_cell.BasicLSTMCell(num_units=rnn_size)

		# create the RNN cell, that is constructed from multiple lstm cells, by duplicating the lstm cell
		self.cell = rnn_cell.MultiRNNCell([lstm_cell] * num_layers)

		# Initial state is a matrix of zeros
		self.initial_state = self.cell.zero_state(batch_size, tf.float32)

		# Define the vectors that will hold Tensorflow state
		self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
		self.targets = tf.placeholder(tf.int32, [batch_size, seq_length])

		# variable_scope is tensorflow best practice that allows us to recycle variables names with different scopes
		with tf.variable_scope(VARIABLE_SCOPE):
			softmax_w = tf.get_variable("softmax_w", [rnn_size, vocabulary_size])
			softmax_b = tf.get_variable("softmax_b", [vocabulary_size])
			with tf.device("/cpu:0"):
				embedding = tf.get_variable("embedding", [vocabulary_size, rnn_size])
				inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
				inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

		def loop_function(prev, _):
			prev = tf.matmul(prev, softmax_w) + softmax_b
			stop_gradient = tf.stop_gradient(tf.argmax(prev, 1))
			return tf.nn.embedding_lookup(embedding, stop_gradient)

		outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, self.cell, loop_function=loop_function if sample else None, scope=VARIABLE_SCOPE)
		output = tf.result_sentencehape(tf.concat(1, outputs), [-1, rnn_size])

		# Calculate the logits and probabilities for the tensor
		self.logits = tf.matmul(output, softmax_w) + softmax_b
		self.probabilities = tf.nn.softmax(self.logits)
		loss = seq2seq.sequence_loss_by_example([self.logits],
				[tf.result_sentencehape(self.targets, [-1])],
				[tf.ones([batch_size * seq_length])],
				vocabulary_size)
		self.cost = tf.reduce_sum(loss) / batch_size / seq_length
		self.final_state = last_state
		self.lr = tf.Variable(0.0, trainable=False)
		tvars = tf.trainable_variables()
		grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
				gradient_clip)
		optimizer = tf.train.AdamOptimizer(self.lr)
		self.train_op = optimizer.apply_gradients(zip(grads, tvars))
예제 #18
0
    def _create_lstm_policy(self, args):
        raise NotImplementedError
        # Create LSTM portion of network
        lstm = rnn_cell.LSTMCell(args.policy_size,
                                 state_is_tuple=True,
                                 initializer=initializers.xavier_initializer())
        self.policy_lstm = rnn_cell.MultiRNNCell([lstm] *
                                                 args.num_policy_layers,
                                                 state_is_tuple=True)
        self.policy_state = self.policy_lstm.zero_state(
            args.batch_size * args.sample_size, tf.float32)

        # Get samples from standard normal distribution, transform to match z-distribution
        samples = tf.random_normal(
            [args.sample_size, args.batch_size, args.z_dim], name="z_samples")
        self.z_samples = samples * tf.exp(self.z_logstd) + self.z_mean
        self.z_samples = tf.transpose(self.z_samples, perm=[1, 0, 2])

        # Construct policy input
        policy_input = tf.concat(2, [self.states, self.z_samples])
        policy_input = tf.reshape(
            policy_input,
            [args.batch_size * args.sample_size, args.state_dim + args.z_dim],
            name="policy_input")

        # Forward pass
        with tf.variable_scope("policy"):
            output, self.final_policy_state = seq2seq.rnn_decoder(
                [policy_input], self.policy_state, self.policy_lstm)
        output = tf.reshape(tf.concat(1, output), [-1, args.policy_size])

        # Fully connected layer to latent variable distribution parameters
        W = tf.get_variable("lstm_w", [args.policy_size, args.action_dim],
                            initializer=initializers.xavier_initializer())
        b = tf.get_variable("lstm_b", [args.action_dim])
        a_mean = tf.nn.xw_plus_b(output, W, b)
        self.a_mean = tf.reshape(
            a_mean, [args.batch_size, args.sample_size, args.action_dim],
            name="a_mean")

        # Initialize logstd
        self.a_logstd = tf.Variable(np.zeros(args.action_dim),
                                    name="a_logstd",
                                    dtype=tf.float32)
예제 #19
0
def basic_decoder( batch_input_shape, cells, code,  keep_prob, **kwargs ):
    # Recieve arguments
    batch_size, timestep, feature = batch_input_shape
    peek = kwargs['peek']

    assert len(cells) == 1, "One cell needed!"
    de_cell = cells[0]
    
    # Start building graph
    hidden_dim = de_cell.output_size 

    # Define code
    code_dropout = tf.nn.dropout(code, keep_prob)
   
    code_dim = int(code_dropout.get_shape()[1])

    # Decoder inputs
    rest_of_decoder_inputs = [ tf.placeholder(tf.float32, shape=[ batch_size, code_dim ]) for _ in range(timestep-1) ]

    decoder_inputs_dropout = [ code_dropout ] + \
            [ tf.nn.dropout(inp, keep_prob) for inp in rest_of_decoder_inputs ] 

    def loop(prev, i):
        if peek:
            return prev + code_dropout # Output as input
        else:
            return prev
   
    decoder_outputs, decoder_state = seq2seq.rnn_decoder( decoder_inputs_dropout, de_cell.zero_state(batch_size,tf.float32), de_cell, loop_function = loop )
   
    W_out = tf.get_variable("W_out", shape=[hidden_dim, feature],
                       initializer=tf.contrib.layers.xavier_initializer())
    b_out = tf.Variable( tf.zeros([ feature ] ) )

    unpacked_reconstruction = [ tf.matmul( tf.nn.dropout( out, keep_prob ), W_out ) for out in decoder_outputs ]

    #recX = tf.nn.relu( tf.transpose(tf.pack(unpacked_reconstruction), perm=[1, 0, 2]) )
    recX = tf.transpose(tf.pack(unpacked_reconstruction), perm=[1, 0, 2])

    return recX
예제 #20
0
 def __init__(self, args):
     self.args = args
     self.dropout = tf.Variable(trainable=False,
                                dtype=tf.float32,
                                initial_value=0)
     cell = rnn_cell.LSTMCell(args.hidden, state_is_tuple=True)
     cell = rnn_cell.MultiRNNCell([cell] * args.num_layers,
                                  state_is_tuple=True)
     self.cell = tf.nn.rnn_cell.DropoutWrapper(
         cell, output_keep_prob=self.dropout)
     self.input_data = tf.placeholder(
         tf.float32, [args.batch_size, args.seq_length, args.seq_dim])
     self.output_data = tf.placeholder(tf.int32, [args.batch_size])
     self.initial_state = cell.zero_state(args.batch_size, tf.float32)
     with tf.variable_scope('rnn_audio'):
         rnn_weights = tf.get_variable("rnn_weights",
                                       [args.hidden, args.num_classes])
         rnn_bias = tf.get_variable("rnn_bias", [args.num_classes])
         with tf.device("/cpu:0"):
             inputs = tf.split(1, args.seq_length, self.input_data)
             inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
     outputs, last_state = seq2seq.rnn_decoder(inputs,
                                               self.initial_state,
                                               cell,
                                               scope='rnn_audio')
     output = outputs[-1]
     self.logits = tf.matmul(output, rnn_weights) + rnn_bias
     self.probabilities = tf.nn.softmax(self.logits)
     loss = seq2seq.sequence_loss_by_example([self.logits],
                                             [self.output_data],
                                             [tf.ones([args.batch_size])],
                                             args.num_classes)
     self.cost = tf.reduce_mean(loss)
     self.final_state = last_state
     self.lr = tf.Variable(0.0, trainable=False)
     train_vars = tf.trainable_variables()
     grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, train_vars),
                                       5)
     optimizer = tf.train.AdamOptimizer(self.lr)
     self.train_op = optimizer.apply_gradients(zip(grads, train_vars))
예제 #21
0
    def val_loss(self):
        # reuse vars 
        tf.get_variable_scope().reuse_variables()

        # unpack values for easier reference
        seq_length = self._opts.fake_sequence_length

        def loop_function(prev, i):
            return tf.matmul(prev, self.W_out) + self.b_out

        # build the decoder rnn
        outputs, states = seq2seq.rnn_decoder(self.decoder_inputs, 
                            self.enc_state, self.cell, loop_function) 

        # so the outputs are the scores
        # we could convert them to probability distributions
        # with a softmax, but for now just treat them as the 
        # direct predictions
        predictions = []
        for idx in range(seq_length):
            pred = loop_function(outputs[idx], idx)
            predictions.append(pred)

        # the targets are the same as the decoder_inputs
        # except shifted ahead in time 1 unit
        targets = [dec_input for dec_input in self.decoder_inputs[1:]]

        # compute the loss, which for now is squared error
        losses = []
        for idx in range(seq_length):
            diff = targets[idx] - predictions[idx]
            loss = tf.reduce_mean(tf.square(diff))
            losses.append(loss)

        # get and return cumulative loss
        loss = tf.add_n(losses)
        return loss
예제 #22
0
    def prepare_graph(self):
        # prepare the input batchholder
        with tf.name_scope("encoder/decoder/convNet"):
            # declare the place holder for a batch of seq
            inputs = []
            targets = []
            # input and targes are both images
            for i in xrange(self.seq_length):
                inputs += [tf.placeholder(dtype=tf.float32,
                                          shape=(None, self.image_shape[0],
                                                 self.image_shape[1]))]
                targets += [tf.placeholder(dtype=tf.float32,
                                           shape=(None, self.image_shape[0],
                                                  self.image_shape[1]))]
            # initial the weights and bias for endcoder and decoder
            # fot each convolution kernel shared the same weights
            self.W_conv = init_W(shape=[20, 20, 1, 32])
            self.b_conv = init_bias(shape=[32])
            encoder_conv = []
            encoder_max = []

            for input, target in zip(inputs, targets):
                encoder_conv += [tf.nn.relu(conv2d(input, self.W_conv) +
                                            self.b_conv)]
                encoder_max += [maxpooling_2x2(encoder_conv[-1])]

        with variable_scope.variable_scope("LSTM-CovolutionSeq2Seq"):
            cell = tf.nn.rnn_cell.BasicLSTMCell(self.lstm_hidden)
            _, enc_state = rnn.rnn(cell, encoder_conv, dtype=tf.float32)
            # put enc_states  into a convolutional net

            decoders, state = rnn_decoder(encoder_max, enc_state, cell,
                                          feed_previous=True)

            for decoder in decoders:  # upsampling
                conv2d(decoder, tf.transpose(self.W_conv, premu=[2, 3, 0, 1]))
예제 #23
0
파일: model.py 프로젝트: Styrke/master-code
    def build(self):
        print('  Building model')
        self.embeddings = tf.Variable(
            tf.random_normal([self.alphabet_size, self.embedd_dims],
            stddev=0.1), name='embeddings')

        X_embedded = tf.gather(self.embeddings, self.Xs,    name='embed_X')
        t_embedded = tf.gather(self.embeddings, self.ts_go, name='embed_t')

        with tf.variable_scope('split_X_inputs'):
            X_list = tf.split(split_dim=1,
                              num_split=self.max_x_seq_len,
                              value=X_embedded)
            X_list = [tf.squeeze(X) for X in X_list]
            [X.set_shape([None, self.embedd_dims]) for X in X_list]

        with tf.variable_scope('split_t_inputs'):
            t_list = tf.split(split_dim=1,
                              num_split=self.max_t_seq_len,
                              value=t_embedded)
            t_list = [tf.squeeze(t) for t in t_list]
            [t.set_shape([None, self.embedd_dims]) for t in t_list]

        with tf.variable_scope('dense_out'):
            W_out = tf.get_variable('W_out', [self.rnn_units, self.alphabet_size])
            b_out = tf.get_variable('b_out', [self.alphabet_size])

        cell = rnn_cell.GRUCell(self.rnn_units)

        # encoder
        enc_outputs, enc_state = rnn.rnn(cell=cell,
                                         inputs=X_list,
                                         dtype=tf.float32,
                                         sequence_length=self.X_len,
                                         scope='rnn_encoder')

        tf.histogram_summary('final_encoder_state', enc_state)

        # The loop function provides inputs to the decoder:
        def decoder_loop_function(prev, i):
            def feedback_on():
                prev_1 = tf.matmul(prev, W_out) + b_out
                # feedback is on, so feed the decoder with the previous output
                return tf.gather(self.embeddings, tf.argmax(prev_1, 1))

            def feedback_off():
                # feedback is off, so just feed the decoder with t's
                return t_list[i]

            return tf.cond(self.feedback, feedback_on, feedback_off)

        # decoder
        dec_out, dec_state = (
                seq2seq.rnn_decoder(decoder_inputs=t_list,
                                    initial_state=enc_state,
                                    cell=cell,
                                    loop_function=decoder_loop_function) )

        self.out = [tf.matmul(d, W_out) + b_out for d in dec_out]

        # for debugging network (NOTE should write this outside of build)
        out_packed = tf.pack(self.out)
        out_packed = tf.transpose(out_packed, perm=[1, 0, 2])
        self.out_tensor = out_packed

        # add TensorBoard summaries for all variables
        tf.contrib.layers.summarize_variables()
예제 #24
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.glove_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder(tf.float32, shape=[N, L, V], name='x')  # [num_batch, sentence_len, glove_dim]
        question = tf.placeholder(tf.float32, shape=[N, Q, V], name='q')  # [num_batch, sentence_len, glove_dim]
        answer = tf.placeholder(tf.int64, shape=[N], name='y')  # [num_batch] - one word answer
        input_mask = tf.placeholder(tf.bool, shape=[N, L], name='x_mask')  # [num_batch, sentence_len]
        is_training = tf.placeholder(tf.bool)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)

        # Input module
        with tf.variable_scope('input') as scope:
            input_list = self.make_decoder_batch_input(input)
            input_states, _ = seq2seq.rnn_decoder(input_list, gru.zero_state(N, tf.float32), gru)

            # Question module
            scope.reuse_variables()

            ques_list = self.make_decoder_batch_input(question)
            questions, _ = seq2seq.rnn_decoder(ques_list, gru.zero_state(N, tf.float32), gru)
            question_vec = questions[-1]  # use final state

        # Masking: to extract fact vectors at end of sentence. (details in paper)
        input_states = tf.transpose(tf.pack(input_states), [1, 0, 2])  # [N, L, D]
        facts = []
        for n in range(N):
            filtered = tf.boolean_mask(input_states[n, :, :], input_mask[n, :])  # [?, D]
            padding = tf.zeros(tf.pack([F - tf.shape(filtered)[0], d]))
            facts.append(tf.concat(0, [filtered, padding]))  # [F, D]

        facked = tf.pack(facts)  # packing for transpose... I hate TF so much
        facts = tf.unpack(tf.transpose(facked, [1, 0, 2]), num=F)  # F x [N, D]

        # Episodic Memory
        with tf.variable_scope('episodic') as scope:
            episode = EpisodeModule(d, question_vec, facts)

            memory = tf.identity(question_vec)
            for t in range(params.memory_step):
                memory = gru(episode.new(memory), memory)[0]
                scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A])
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdadeltaOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.q = question
        self.y = answer
        self.mask = input_mask
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
예제 #25
0
파일: model.py 프로젝트: chubbymaggie/clgen
    def _init_tensorflow(self, infer: bool = False):
        """
        Deferred importing of tensorflow and initializing model for training
        or sampling.

        This is necessary for two reasons: first, the tensorflow graph is
        different for training and inference, so must be reset when switching
        between modes. Second, importing tensorflow takes a long time, so
        we only want to do it if we actually need to.

        Arguments:
            infer (bool): If True, initialize model for inference. If False,
                initialize model for training.

        Returns:
            module: imported TensorFlow module
        """
        import tensorflow as tf
        from tensorflow.python.ops import rnn_cell
        from tensorflow.python.ops import seq2seq

        # Use self.tensorflow_state to mark whether or not model is configured
        # for training or inference.
        try:
            if self.tensorflow_state == infer:
                return tf
        except AttributeError:
            pass

        self.cell_fn = {
            "lstm": rnn_cell.BasicLSTMCell,
            "gru": rnn_cell.GRUCell,
            "rnn": rnn_cell.BasicRNNCell
        }.get(self.model_type, None)
        if self.cell_fn is None:
            raise clgen.UserError("Unrecognized model type")

        # reset the graph when switching between training and inference
        tf.reset_default_graph()

        # corpus info:
        batch_size = 1 if infer else self.corpus.batch_size
        seq_length = 1 if infer else self.corpus.seq_length
        vocab_size = self.corpus.vocab_size

        fs.mkdir(self.cache.path)

        cell = self.cell_fn(self.rnn_size, state_is_tuple=True)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * self.num_layers,
                                                 state_is_tuple=True)
        self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.targets = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.initial_state = self.cell.zero_state(batch_size, tf.float32)

        scope_name = 'rnnlm'
        with tf.variable_scope(scope_name):
            softmax_w = tf.get_variable("softmax_w",
                                        [self.rnn_size, vocab_size])
            softmax_b = tf.get_variable("softmax_b", [vocab_size])

            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding",
                                            [vocab_size, self.rnn_size])
                inputs = tf.split(
                    1, seq_length,
                    tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if infer else None,
            scope=scope_name)
        output = tf.reshape(tf.concat(1, outputs), [-1, self.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([batch_size * seq_length])], vocab_size)
        self.cost = tf.reduce_sum(loss) / batch_size / seq_length
        self.final_state = last_state
        self.learning_rate = tf.Variable(0.0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          self.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # set model status
        self.tensorflow_state = infer

        return tf
예제 #26
0
# outputs, finstate = rnn.rnn(neurons, inputs, init_state)

####################

# hand made seq2seq

# outputs_le, finstate = rnn.rnn(neurons, inputs, init_state)
# inp_state = array_ops.slice(finstate, [0, 0], [batch_size, input_size])
# le_state = array_ops.slice(finstate, [0, input_size], [batch_size, le_size])
# finstate = array_ops.concat(1, [le_state, inp_state])
# outputs, finstate = rnn.rnn(neurons_out, outputs_le, finstate, scope="out")

####################

outputs, finstate = ss.rnn_decoder(inputs, state, neurons)

loss = tf.add_n([ tf.nn.l2_loss(target - output) for output, target in zip(outputs, targets) ]) / bptt_steps / batch_size / net_size

###

test_inputs  = [ tf.placeholder(tf.float32, shape=(1, input_size), name="TestInput{}".format(idx)) for idx in xrange(bptt_steps) ]
test_state = tf.placeholder(tf.float32, shape=(1, state_size), name="TestState")

variable_scope.get_variable_scope().reuse_variables()
test_outputs, test_finstate = ss.rnn_decoder(test_inputs, test_state, neurons)

###


lrate_var = tf.Variable(0.0, trainable=False)
예제 #27
0
    def __init__(self, args, infer=False): # infer is set to true during sampling.
        self.args = args
        if infer:
            # Worry about one character at a time during sampling; no batching or BPTT.
            args.batch_size = 1
            args.seq_length = 1

        # Set cell_fn to the type of network cell we're creating -- RNN, GRU or LSTM.
        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        # Call tensorflow library tensorflow-master/tensorflow/python/ops/rnn_cell
        # to create a layer of rnn_size cells of the specified basic type (RNN/GRU/LSTM).
        cell = cell_fn(args.rnn_size, state_is_tuple=True)

        # Use the same rnn_cell library to create a stack of these cells
        # of num_layers layers. Pass in a python list of these cells.
        # (The [cell] * arg.num_layers syntax literally duplicates cell multiple times in
        # a list. The syntax is such that [5, 6] * 3 would return [5, 6, 5, 6, 5, 6].)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers, state_is_tuple=True)

        # Create two TF placeholder nodes of 32-bit ints (NOT floats!),
        # each of shape batch_size x seq_length. This shape matches the batches
        # (listed in x_batches and y_batches) constructed in create_batches in utils.py.
        # input_data will receive input batches, and targets will be what it compares against
        # to calculate loss.
        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])

        # Using the zero_state function in the RNNCell master class in rnn_cell library,
        # create a tensor of zeros such that we can swap it in for the network state at any time
        # to zero out the network's state.
        # State dimensions are: cell_fn state size (2 for LSTM) x rnn_size x num_layers.
        # So an LSTM network with 100 cells per layer and 3 layers would have a state size of 600,
        # and initial_state would have a dimension of none x 600.
        self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)

        # Scope our new variables to the scope identifier string "rnnlm".
        with tf.variable_scope('rnnlm'):
            # Create new variable softmax_w and softmax_b for output.
            # softmax_w is a weights matrix from the top layer of the model (of size rnn_size)
            # to the vocabulary output (of size vocab_size).
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            # softmax_b is a bias vector of the ouput characters (of size vocab_size).
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            # [TODO: Why specify CPU? Same as the TF translation tutorial, but don't know why.]
            with tf.device("/cpu:0"):
                # Create new variable named 'embedding' to connect the character input to the base layer
                # of the RNN. Its role is the conceptual inverse of softmax_w.
                # It contains the trainable weights from the one-hot input vector to the lowest layer of RNN.
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                # Create an embedding tensor with tf.nn.embedding_lookup(embedding, self.input_data).
                # This tensor has dimensions batch_size x seq_length x rnn_size.
                # tf.split splits that embedding lookup tensor into seq_length tensors (along dimension 1).
                # Thus inputs is a list of seq_length different tensors,
                # each of dimension batch_size x 1 x rnn_size.
                inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                # Iterate through these resulting tensors and eliminate that degenerate second dimension of 1,
                # i.e. squeeze each from batch_size x 1 x rnn_size down to batch_size x rnn_size.
                # Thus we now have a list of seq_length tensors, each with dimension batch_size x rnn_size.
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        # THIS LOOP FUNCTION IS NEVER ACTUALLY USED.
        # IT IS EXPLICITLY NOT USED DURING TRAINING.
        # DURING INFERENCE, SEQ_LENGTH == 1, SO SEQ2SEQ.RNN_DECODER() ONLY USES THE LOOP ARGUMENT
        # ON SEQUENCE LENGTH ITEMS SUBSEQUENT TO THE FIRST.
        # This looping function is used as part of seq2seq.rnn_decoder only during sampling -- not training.
        # prev is a 2D Tensor of shape [batch_size x cell.output_size].
        # returns a 2D Tensor of shape [batch_size x cell.input_size].
        def loop(prev, _):
            # prev is initially the top cell state.
            # Convert the top cell state into character logits.
            prev = tf.matmul(prev, softmax_w) + softmax_b
            # Pull the character with the greatest logit (no sampling, just argmaxing).
            # WHY IS THIS ARGMAXING WHEN ACTUAL SAMPLING IS DONE PROBABILISTICALLY?
            # DOESN'T THIS CAUSE OUTPUTS NOT TO MATCH INPUTS DURING SEQUENCE GENERATION?
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            # Re-embed that symbol as the next step's input, and return that.
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        # Set up a seq2seq decoder from the seq2seq.py library.
        # This constructs the outputs and states nodes of the network.
        # Outputs is a list (of len seq_length, same as inputs) of tensors of shape [batch_size x rnn_size].
        # These are the raw output values of the top layer of the network at each time step.
        # They have NOT been fed through the decoder projection; they are still in network space,
        # not character space.
        # State is a tensor of shape [batch_size x cell.state_size].
        # This is also the step where all of the trainable parameters for the LSTM (weights and biases) are defined.
        outputs, self.final_state = seq2seq.rnn_decoder(inputs,
                self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        # tf.concat concatenates the output tensors along the rnn_size dimension,
        # to make a single tensor of shape [batch_size x (seq_length * rnn_size)].
        # This gives the following 2D outputs matrix:
        #   [(rnn output: batch 0, seq 0) (rnn output: batch 0, seq 1) ... (rnn output: batch 0, seq seq_len-1)]
        #   [(rnn output: batch 1, seq 0) (rnn output: batch 1, seq 1) ... (rnn output: batch 1, seq seq_len-1)]
        #   ...
        #   [(rnn output: batch batch_size-1, seq 0) (rnn output: batch batch_size-1, seq 1) ... (rnn output: batch batch_size-1, seq seq_len-1)]
        # tf.reshape then reshapes it to a tensor of shape [(batch_size * seq_length) x rnn_size].
        # Output will now be the following matrix:
        #   [rnn output: batch 0, seq 0]
        #   [rnn output: batch 0, seq 1]
        #   ...
        #   [rnn output: batch 0, seq seq_len-1]
        #   [rnn output: batch 1, seq 0]
        #   [rnn output: batch 1, seq 1]
        #   ...
        #   [rnn output: batch 1, seq seq_len-1]
        #   ...
        #   ...
        #   [rnn output: batch batch_size-1, seq seq_len-1]
        # Note the following comment in rnn_cell.py:
        #   Note: in many cases it may be more efficient to not use this wrapper,
        #   but instead concatenate the whole sequence of your outputs in time,
        #   do the projection on this batch-concatenated sequence, then split it
        #   if needed or directly feed into a softmax.
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        # Obtain logits node by applying output weights and biases to the output tensor.
        # Logits is a tensor of shape [(batch_size * seq_length) x vocab_size].
        # Recall that outputs is a 2D tensor of shape [(batch_size * seq_length) x rnn_size],
        # and softmax_w is a 2D tensor of shape [rnn_size x vocab_size].
        # The matrix product is therefore a new 2D tensor of [(batch_size * seq_length) x vocab_size].
        # In other words, that multiplication converts a loooong list of rnn_size vectors
        # to a loooong list of vocab_size vectors.
        # Then add softmax_b (a single vocab-sized vector) to every row of that list.
        # That gives you the logits!
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        # Convert logits to probabilities. Probs isn't used during training! That node is never calculated.
        # Like logits, probs is a tensor of shape [(batch_size * seq_length) x vocab_size].
        # During sampling, this means it is of shape [1 x vocab_size].
        self.probs = tf.nn.softmax(self.logits)
        # seq2seq.sequence_loss_by_example returns 1D float Tensor containing the log-perplexity
        # for each sequence. (Size is batch_size * seq_length.)
        # Targets are reshaped from a [batch_size x seq_length] tensor to a 1D tensor, of the following layout:
        #   target character (batch 0, seq 0)
        #   target character (batch 0, seq 1)
        #   ...
        #   target character (batch 0, seq seq_len-1)
        #   target character (batch 1, seq 0)
        #   ...
        # These targets are compared to the logits to generate loss.
        # Logits: instead of a list of character indices, it's a list of character index probability vectors.
        # seq2seq.sequence_loss_by_example will do the work of generating losses by comparing the one-hot vectors
        # implicitly represented by the target characters against the probability distrutions in logits.
        # It returns a 1D float tensor (a vector) where item i is the log-perplexity of
        # the comparison of the ith logit distribution to the ith one-hot target vector.
        loss = seq2seq.sequence_loss_by_example([self.logits], # logits: 1-item list of 2D Tensors of shape [batch_size x vocab_size]
                [tf.reshape(self.targets, [-1])], # targets: 1-item list of 1D batch-sized int32 Tensors of the same length as logits
                [tf.ones([args.batch_size * args.seq_length])], # weights: 1-item list of 1D batch-sized float-Tensors of the same length as logits
                args.vocab_size) # num_decoder_symbols: integer, number of decoder symbols (output classes)
        # Cost is the arithmetic mean of the values of the loss tensor
        # (the sum divided by the total number of elements).
        # It is a single-element floating point tensor. This is what the optimizer seeks to minimize.
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        # Create a summary for our cost.
        tf.scalar_summary("cost", self.cost)
        # Create a node to track the learning rate as it decays through the epochs.
        self.lr = tf.Variable(args.learning_rate, trainable=False)
        self.global_epoch_fraction = tf.Variable(0.0, trainable=False)
        self.global_seconds_elapsed = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables() # tvars is a python list of all trainable TF Variable objects.

        # tf.gradients returns a list of tensors of length len(tvars) where each tensor is sum(dy/dx).
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr) # Use ADAM optimizer with the current learning rate.
        # Zip creates a list of tuples, where each tuple is (variable tensor, gradient tensor).
        # Training op nudges the variables along the gradient, with the given learning rate, using the ADAM optimizer.
        # This is the op that a training session should be instructed to perform.
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        self.summary_op = tf.merge_all_summaries()
예제 #28
0
파일: g2p.py 프로젝트: datavizweb/g2p-tf
    def __init__(self, args):
        self.args = args

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        ##
        ## input data will be of dimension
        ## shape = (batch_size, seq_length, invocab_size)
        ##
        self.input_data = tf.placeholder(tf.float32, 
                                         [args.batch_size, 
                                          args.seq_length, 
                                          args.char_size])

        ##
        ## target data will be of dimension
        ## shape = (batch_size, seq_length)
        ## NOTE : out dim not specified here
        ##
        self.targets = tf.placeholder(tf.int32, 
                                      [args.batch_size, 
                                       args.seq_length])

        ##
        ## initial state is of size batch_size * state_size
        ## this is equivalent to tf.zeros([batch_size, state_size])
        ##
        self.initial_state = cell.zero_state(args.batch_size, 
                                             tf.float32)

        ##
        ## input and final softmax layer outputs
        ## here we specify the out dimention
        ##
        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", 
                                        [args.rnn_size, 
                                         args.phvocab_size])
            softmax_b = tf.get_variable("softmax_b", 
                                        [args.phvocab_size])

            ##
            ## unrolling of the input to sequence length
            ## and removing the 1 dim
            ##
            inputs = tf.split(1, args.seq_length, self.input_data)
            inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        ##
        ## simple rnn decoder. Simple meaning without attention
        ## last_state is the final state from rnn after specified
        ## sequence length. 
        ## last_state is the thought vector
        ##
        outputs, last_state = seq2seq.rnn_decoder(inputs, 
                                                  self.initial_state, 
                                                  cell, 
                                                  scope='rnnlm')

        ##
        ## outputs is a list of size sequence length.
        ## Each list element is of dimention batch_size * rnn_size
        ## i.e for each unrolled input, there will be one output state
        ## (last state) each will be of dimension rnn_size.
        ##
        outconcat = tf.concat(1, outputs)
        output = tf.reshape(outconcat, [-1, args.rnn_size])

        ##
        ## final logit layer
        ## NOTE : x * W (where x is batch * rnn_size)
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)

        ##
        ## cost function
        ## 
        reshaped_target = tf.reshape(self.targets, [-1]),
        seq_weight = tf.ones([args.batch_size * args.seq_length])
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [reshaped_target], [seq_weight], args.phvocab_size)

        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state

        ##
        ## Optimizer
        ## Adam optimizer and gradient clipping
        ##
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, 
                                                       tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)

        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
예제 #29
0
    def build_graph(self, test):
        """
        Builds an LSTM graph in TensorFlow.
        """
        if test:
            self.batch_size = 1
            self.seq_len = 1

        ##
        # LSTM Cells
        ##

        lstm_cell = rnn_cell.BasicLSTMCell(self.cell_size)
        self.cell = rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers)

        ##
        # Data
        ##

        # inputs and targets are 2D tensors of shape
        self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.seq_len])
        self.targets = tf.placeholder(tf.int32,
                                      [self.batch_size, self.seq_len])
        self.initial_state = self.cell.zero_state(self.batch_size, tf.float32)

        ##
        # Variables
        ##
        with tf.variable_scope('lstm_vars'):
            self.ws = tf.get_variable('ws', [self.cell_size, self.vocab_size])
            self.bs = tf.get_variable('bs',
                                      [self.vocab_size])  # TODO: initializer?
            with tf.device('/cpu:0'
                           ):  # put on CPU to parallelize for faster training/
                self.embeddings = tf.get_variable(
                    'embeddings', [self.vocab_size, self.cell_size])

                # get embeddings for all input words
                input_embeddings = tf.nn.embedding_lookup(
                    self.embeddings, self.inputs)
                # The split splits this tensor into a seq_len long list of 3D tensors of shape
                # [batch_size, 1, rnn_size]. The squeeze removes the 1 dimension from the 1st axis
                # of each tensor
                inputs_split = tf.split(1, self.seq_len, input_embeddings)
                inputs_split = [
                    tf.squeeze(input_, [1]) for input_ in inputs_split
                ]

                # inputs_split looks like this:
                # [
                #   tensor_<0>([
                #       [batchElt<0>_wordEmbedding<0>],
                #       ...,
                #       [batchElt<batch_size - 1>_wordEmbedding<0>]
                #   ]),
                #   ...,
                #   tensor_<seq_len - 1>([
                #       [batchElt<0>_wordEmbedding<seq_len - 1>],
                #       ...,
                #       [batchElt<batch_size - 1>_wordEmbedding<seq_len - 1>]
                #   ])
                # ]

        def loop(prev, _):
            prev = tf.matmul(prev, self.ws) + self.bs
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(self.embeddings, prev_symbol)

        lstm_outputs_split, self.final_state = seq2seq.rnn_decoder(
            inputs_split,
            self.initial_state,
            self.cell,
            loop_function=loop if test else None,
            scope='lstm_vars')
        lstm_outputs = tf.reshape(tf.concat(1, lstm_outputs_split),
                                  [-1, self.cell_size])

        # outputs looks like this:
        # [
        #   tensor_<0>([
        #       [batchElt<0>_outputEmbedding<0>],
        #       ...,
        #       [batchElt<batch_size - 1>_outputEmbedding<0>]
        #   ]),
        #   ...,
        #   tensor_<seq_len - 1>([
        #       [batchElt<0>_outputEmbedding<seq_len - 1>],
        #       ...,
        #       [batchElt<batch_size - 1>_outputEmbedding<seq_len - 1>]
        #   ])
        # ]

        # output looks like this:
        # tensor([
        #     [batchElt<0>_outputEmbedding<0>],
        #     ...,
        #     [batchElt<0>_outputEmbedding<seq_len - 1>],
        #     [batchElt<1>_outputEmbedding<0>],
        #     ...,
        #     [batchElt<1>_outputEmbedding<seq_len - 1>],
        #     ...
        #     [batchElt<batch_size - 1>_outputEmbedding<0>],
        #     ...,
        #     [batchElt<batch_size - 1>_outputEmbedding<seq_len - 1>]
        # ])

        logits = tf.matmul(lstm_outputs, self.ws) + self.bs
        self.probs = tf.nn.softmax(logits)

        ##
        # Train
        ##

        total_loss = seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([self.batch_size * self.seq_len])], self.vocab_size)
        self.loss = tf.reduce_sum(total_loss) / self.batch_size / self.seq_len

        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        self.optimizer = tf.train.AdamOptimizer(learning_rate=c.L_RATE,
                                                name='optimizer')
        self.train_op = self.optimizer.minimize(self.loss,
                                                global_step=self.global_step,
                                                name='train_op')
예제 #30
0
    def forward(self):
        # unpack values for easier reference
        seq_length = self._opts.fake_sequence_length
        batch_size = self._opts.batch_size
        input_dim = self._opts.fake_input_dim
        num_hidden = self._opts.num_hidden

        # define the placeholders / symbolic inputs to the graph
        encoder_inputs = []
        decoder_inputs = []
        for idx in range(seq_length):
        
            encoder_inputs.append(tf.placeholder(tf.float32, 
                shape=(batch_size, input_dim), 
                name= 'encoder_inputs_{}'.format(idx)))
            
            decoder_inputs.append(tf.placeholder(tf.float32, 
                shape=(batch_size, input_dim), 
                name= 'decoder_inputs_{}'.format(idx)))

        # we do this an extra time for the decoder because
        # has a <START> token appended to the front
        decoder_inputs.append(tf.placeholder(tf.float32, 
            shape=(batch_size, input_dim), 
            name= 'decoder_inputs_{}'.format(seq_length)))
        

        # create the encoder rnn
        self.cell = rnn.rnn_cell.BasicLSTMCell(num_hidden)
        _, self.enc_state = rnn.rnn(self.cell, encoder_inputs, dtype=tf.float32)

        # define a custom function to convert each decoder output 
        # at each timestep of dimension num_hidden into 
        # the same dimension as the output (which in this case
        # is the same as the input) so it can be used as a prediction
        # or as the input to the next time step of the decoding
        self.W_out = tf.Variable(tf.random_uniform([num_hidden, input_dim], -1, 1), name="sm_w")
        self.b_out = tf.Variable(tf.zeros([input_dim]), name="sm_b")
        def loop_function(prev, i):
            return tf.matmul(prev, self.W_out) + self.b_out

        # build the decoder rnn
        outputs, states = seq2seq.rnn_decoder(decoder_inputs, self.enc_state, self.cell) 

        # so the outputs are the scores
        # we could convert them to probability distributions
        # with a softmax, but for now just treat them as the 
        # direct predictions
        predictions = []
        for idx in range(seq_length):
            pred = loop_function(outputs[idx], idx)
            predictions.append(pred)


        # set the encoder_inputs and decoder_inputs to be members 
        # of the object because they are required for each train step
        # in contrast, the predictions are only used for defining
        # the graph, so we just return them once
        self.encoder_inputs = encoder_inputs
        self.decoder_inputs = decoder_inputs

        return predictions
예제 #31
0
파일: rnn.py 프로젝트: Audakel/byu_cs
with tf.variable_scope("COMPUTATION", reuse=None):
    # create a BasicLSTMCell
    cell = GRUCell(state_dim)  # True )

    #   use it to create a MultiRNNCell
    cell = rnn_cell.MultiRNNCell([cell] * num_layers)

    #   use it to create an initial_state
    #     note that initial_state will be a *list* of tensors!
    initial_state = cell.zero_state(batch_size, tf.float32)

    softmax_w = tf.get_variable("softmax_w", [state_dim, vocab_size])
    softmax_b = tf.get_variable("softmax_b", [vocab_size])

    # call seq2seq.rnn_decoder
    outputs, last_state = seq2seq.rnn_decoder(inputs, initial_state, cell)
    output = tf.reshape(tf.concat(1, outputs), [-1, state_dim])

    # transform the list of state outputs to a list of logits.
    logits = tf.matmul(output, softmax_w) + softmax_b
    # use a linear transformation.
    probs = tf.nn.softmax(logits)
    # call seq2seq.sequence_loss
    loss = seq2seq.sequence_loss([logits],
                                 [tf.reshape(targets, [-1])],
                                 [tf.ones([batch_size * sequence_length])],
                                 vocab_size)
    cost = tf.reduce_sum(loss) / batch_size / sequence_length
    final_state = last_state
    lr = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
예제 #32
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            self.batch_size = 1
            self.seq_length = 1
        else:
            self.batch_size = args.batch_size
            self.seq_length = args.seq_length

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        elif args.model == 'dropgru' or args.model == 'droprnn':
            pass
        else:
            raise Exception("model type not supported: {}".format(args.model))

        if args.model.startswith('drop'):
            cells = []
            
            dt1 = DropoutBasicRNNCell
            dt2 = DropoutGRUCell
            if args.model != 'dropgru':
                print("additional layers will be basic RNN")
                dt2 = DropoutBasicRNNCell
            
            for ii in range(args.num_layers):
                if False and args.learn_input_embedding:
                    # context-dependent embedding learned as a small RNN before the large GRUs
                    args.learn_input_embedding = False
                    if ii == 0:
                        nc = dt1(args.vocab_size, input_size=args.vocab_size, probofdrop_st=args.dropout, probofdrop_in=0.0)
                    elif ii == 1:
                        nc = dt2(args.rnn_size,   input_size=args.vocab_size, probofdrop_st=args.dropout, probofdrop_in=args.dropout)
                    else:
                        nc = dt2(args.rnn_size,   input_size=args.rnn_size,   probofdrop_st=args.dropout, probofdrop_in=args.dropout)
                else:
                    # embedding is fixed, context-independent; like word vectors
                    firstdroprate = 0.0
                    if args.learn_input_embedding:
                        firstdroprate = args.dropout
                    if ii == 0:
                        nc = dt2(args.rnn_size, input_size=args.vocab_size, probofdrop_st=args.dropout, probofdrop_in=firstdroprate)
                    else:
                        nc = dt2(args.rnn_size, input_size=args.rnn_size,   probofdrop_st=args.dropout, probofdrop_in=args.dropout)
                cells.append(nc)
            
            self.cell = rnn_cell.MultiRNNCell(cells)
            self.cellusesdropout = True
        else:
            print("building basic non-dropout model")
            c1 = cell_fn(args.rnn_size)
            self.cell = rnn_cell.MultiRNNCell([c1] * args.num_layers)
            self.cellusesdropout = False

        self.input_data = tf.placeholder(tf.int32, [self.batch_size, self.seq_length], name="x_input_data")
        self.targets = tf.placeholder(tf.int32, [self.batch_size, self.seq_length], name="y_targets")
        self.initial_state = self.cell.zero_state(self.batch_size, tf.float32)

        if args.learn_input_embedding:
            self.embedding = tf.get_variable("embedding", [args.vocab_size, args.vocab_size])
        else:
            self.embedding = tf.placeholder(tf.float32, [args.vocab_size, args.vocab_size], name="embedding")

        if self.cellusesdropout:
            self._dropMaskOutput = tf.placeholder(dtype=tf.float32, shape=[self.batch_size*self.seq_length, args.rnn_size], name="dropout_output_mask")
            self._latest_mask_output = None

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("top_softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("top_softmax_b", [args.vocab_size])
            inputs = tf.split(1, self.seq_length, tf.nn.embedding_lookup(self.embedding, self.input_data))
            inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            if self.cellusesdropout:
                assert(prev.get_shape() == self._dropMaskOutput.get_shape())
                prev = tf.matmul(tf.mul(prev, self._dropMaskOutput), softmax_w) + softmax_b
            else:
                prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(self.embedding, prev_symbol)

        self.temperature = tf.placeholder(tf.float32, 1, name="temperature")

        # if loop_function is not None, it is used to generate the next input
        # otherwise, if it is None, the next input will be from the "inputs" sequence
        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, self.cell, loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [self.batch_size*self.seq_length, args.rnn_size])

        if self.cellusesdropout:
            assert(output.get_shape() == self._dropMaskOutput.get_shape())
            self.logits = tf.matmul(tf.mul(output, self._dropMaskOutput), softmax_w) + softmax_b
        else:
            self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        self.probswithtemp = tf.nn.softmax(self.logits / self.temperature)

        # 1.44... term converts cost from units of "nats" to units of "bits"
        self.cost = seq2seq.sequence_loss([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([self.batch_size * self.seq_length])]) * 1.44269504088896340736
        self.pred_entropy = tf.reduce_sum(tf.mul(self.probs, tf.log(self.probs + 1e-12)), 1) * (-1.44269504088896340736)

        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False, name="learningrate")
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        zipgradvars = zip(grads, tvars)
        self.train_op = optimizer.apply_gradients(zipgradvars)

        # for tensorboard
        tb_cost = tf.scalar_summary('cost_train', self.cost)
        tb_predent = tf.scalar_summary('prediction_entropy_train', tf.reduce_mean(self.pred_entropy))
        mergethese = [tb_cost, tb_predent]
        for grad,var in zipgradvars:
            mergethese.append(tf.histogram_summary(var.name+'_value', var))
            mergethese.append(tf.histogram_summary(var.name+'_grad', grad))
        self.tbsummary = tf.merge_summary(mergethese)
예제 #33
0
    def build_network(self):
        with tf.variable_scope('encoder'):
            z_mean_w = tf.Variable(
                self.initializer([self._enc_cell.state_size, self.n_latent]))
            z_mean_b = tf.Variable(tf.zeros([self.n_latent], dtype=tf.float32))
            z_logvar_w = tf.Variable(
                self.initializer([self._enc_cell.state_size, self.n_latent]))
            z_logvar_b = tf.Variable(
                tf.zeros([self.n_latent], dtype=tf.float32))

            _, enc_state = rnn.rnn(self._enc_cell,
                                   self.inputs,
                                   dtype=tf.float32)
            self.z_mean = tf.add(tf.matmul(enc_state, z_mean_w), z_mean_b)
            self.z_log_var = tf.add(tf.matmul(enc_state, z_logvar_w),
                                    z_logvar_b)
            eps = tf.random_normal((self.batch_size, self.n_latent),
                                   0,
                                   1,
                                   dtype=tf.float32)
            self.z = tf.add(self.z_mean,
                            tf.mul(tf.sqrt(tf.exp(self.z_log_var)), eps))

        with tf.variable_scope('decoder') as scope:
            dec_in_w = tf.Variable(
                self.initializer([self.n_latent, self._dec_cell.state_size],
                                 dtype=tf.float32))
            dec_in_b = tf.Variable(
                tf.zeros([self._dec_cell.state_size], dtype=tf.float32))
            dec_out_w = tf.Variable(
                self.initializer([self.n_hidden, self.elem_num],
                                 dtype=tf.float32))
            dec_out_b = tf.Variable(tf.zeros([self.elem_num],
                                             dtype=tf.float32))

            initial_dec_state = self.transfer_func(
                tf.add(tf.matmul(self.z, dec_in_w), dec_in_b))
            dec_out, _ = seq2seq.rnn_decoder(self.inputs, initial_dec_state,
                                             self._dec_cell)
            if self.reverse:
                dec_out = dec_out[::-1]
            dec_output = tf.transpose(tf.pack(dec_out), [1, 0, 2])
            batch_dec_out_w = tf.tile(tf.expand_dims(dec_out_w, 0),
                                      [self.batch_size, 1, 1])
            self.output = tf.nn.sigmoid(
                tf.batch_matmul(dec_output, batch_dec_out_w) + dec_out_b)

            scope.reuse_variables()
            dec_gen_input = [
                0.5 *
                tf.ones([self.batch_size, self.elem_num], dtype=tf.float32)
                for _ in range(self.step_num)
            ]
            self.z_gen = tf.placeholder(tf.float32,
                                        [self.batch_size, self.n_latent])
            dec_gen_state = self.transfer_func(
                tf.add(tf.matmul(self.z_gen, dec_in_w), dec_in_b))
            dec_gen_out, _ = seq2seq.rnn_decoder(dec_gen_input, dec_gen_state,
                                                 self._dec_cell)
            if self.reverse:
                dec_gen_out = dec_gen_out[::-1]
            dec_gen_output = tf.transpose(tf.pack(dec_gen_out), [1, 0, 2])
            self.gen_output = tf.nn.sigmoid(
                tf.batch_matmul(dec_gen_output, batch_dec_out_w) + dec_out_b)

        self.inp = tf.transpose(tf.pack(self.inputs), [1, 0, 2])
        self.train_loss = self.get_loss()
        self.train = tf.train.AdamOptimizer(self.learning_rate).minimize(
            self.train_loss)
예제 #34
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.float32, [args.batch_size, args.seq_length], name="input")
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name="targets")
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

	inputs_data = tf.split(1, args.seq_length, self.input_data)

	args.vocab_size = 1

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
        #    with tf.device("/cpu:0"):
        #        embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
        #        inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
		#inputs = tf.split(1, args.seq_length, self.input_data)
        #        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        #def loop(prev, _):
        #    prev = tf.matmul(prev, softmax_w) + softmax_b
        #    prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
        #    return tf.nn.embedding_lookup(embedding, prev_symbol)

        #outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
	outputs, last_state = seq2seq.rnn_decoder(inputs_data, self.initial_state, cell)
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        #loss = seq2seq.sequence_loss_by_example([self.logits],
        #        [tf.reshape(self.targets, [-1])],
        #        [tf.ones([args.batch_size * args.seq_length])],
        #        args.vocab_size)
        self.reg_cost = tf.reduce_sum(1e-1 * (tf.nn.l2_loss(softmax_w)))
        target = tf.cast(self.targets, tf.float32)
	self.target_vector = tf.reshape(target, [-1])
        loss = tf.pow(self.logits / self.target_vector, 2)

        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length  + self.reg_cost
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
예제 #35
0
    def __init__(self,
                 n_input,
                 model,
                 rnn_size,
                 rnn_num_layers,
                 n_outputs,
                 batch_size,
                 input_seq_length,
                 grad_clip,
                 infer=True):

        if infer:
            batch_size = 1
            input_seq_length = 1

        if model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(model))

        cell = cell_fn(rnn_size, state_is_tuple=True)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * rnn_num_layers,
                                                 state_is_tuple=True)

        self.n_input = n_input
        self.input_data = tf.placeholder(
            tf.int32, [batch_size, input_seq_length, n_input])
        self.targets = tf.placeholder(
            tf.int32, [batch_size, input_seq_length, n_outputs])
        self.initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.variable_scope('rnn_model'):
            output_w = tf.get_variable("output_w", [rnn_size, n_outputs])
            output_b = tf.get_variable("output_b", [n_outputs])

            inputs = tf.split(1, input_seq_length, self.input_data)
            inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, output_w) + output_b
            return prev

        outputs, last_state = seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if infer else None,
            scope='rnn_model')

        #The following gives (batch_size * input_seq_length, rnn_size) shape tensor
        output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
        self.logits = tf.matmul(
            output,
            output_w) + output_b  # (batch_size * input_seq_length, n_outputs)
        self.output = tf.sigmoid(self.logits)
        self.loss = tf.reduce_sum((tf.reshape(self.targets, [-1, n_outputs]) - self.output)**2) / \
               (batch_size * input_seq_length * n_outputs)

        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
예제 #36
0
def decoder_rnn(conv_encoder,
                rnn_encoder,
                decoder_inputs,
                decoder_hidden,
                weigth_generation,
                n_steps,
                bias_generation,
                batch_size,
                keep_prob,
                encoder_states,
                defendant,
                embedding,
                sample_rate,
                lstm_layer=1,
                is_train=True):

    with tf.name_scope('decoder_rnn') as scope:

        lstm_cell = rnn_cell.BasicLSTMCell(decoder_hidden,
                                           forget_bias=1.0,
                                           state_is_tuple=True)

        if lstm_layer > 1:
            lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * lstm_layer)

        batch_decoder_inputs = tf.nn.embedding_lookup(embedding,
                                                      decoder_inputs)
        batch_decoder_inputs = tf.transpose(batch_decoder_inputs, [1, 0, 2])
        batch_decoder_inputs = tf.unpack(batch_decoder_inputs)
        batch_decoder_inputs = [
            tf.concat(1, [batch_decoder_inputs[i], conv_encoder])
            for i in range(len(batch_decoder_inputs))
        ]

        if is_train:

            def func(prev, i):

                #words prob
                words_prob = tf.nn.bias_add(tf.matmul(prev, weigth_generation),
                                            bias_generation)

                sample = tf.argmax(words_prob, 1)
                prev_word = tf.nn.embedding_lookup(embedding, sample)
                prev_outputs = tf.concat(1, [prev_word, conv_encoder])

                # select from prev_outputs and ground truth
                prob = tf.random_uniform(minval=0,
                                         maxval=1,
                                         shape=(batch_size, ))
                mask = tf.cast(tf.greater(sample_rate, prob), tf.float32)
                mask = tf.expand_dims(mask, 1)
                mask = tf.tile(mask,
                               [1, prev_outputs.get_shape().as_list()[-1]])

                next_input = mask * prev_outputs + (
                    1 - mask) * batch_decoder_inputs[i]

                return next_input

            outputs, state = seq2seq.rnn_decoder(
                decoder_inputs=batch_decoder_inputs,
                initial_state=encoder_states,
                cell=lstm_cell,
                loop_function=func,
                scope='rnn_decoder')

        else:

            def func(prev, i):

                #words prob
                words_prob = tf.nn.bias_add(tf.matmul(prev, weigth_generation),
                                            bias_generation)

                sample = tf.argmax(words_prob, 1)
                prev_word = tf.nn.embedding_lookup(embedding, sample)
                prev_outputs = tf.concat(1, [prev_word, conv_encoder])

                return prev_outputs

            outputs, state = seq2seq.rnn_decoder(
                decoder_inputs=batch_decoder_inputs,
                initial_state=encoder_states,
                cell=lstm_cell,
                loop_function=func,
                scope='rnn_decoder')

        outputs = tf.nn.dropout(outputs, keep_prob)
        outputs = tf.unpack(outputs)

        res = [0 for i in range(n_steps)]
        for i in range(len(outputs)):

            #words prob
            res[i] = tf.nn.bias_add(tf.matmul(outputs[i], weigth_generation),
                                    bias_generation)

        return res, state
예제 #37
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])  #(3, 2)

        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])     #(3, 2)

        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        self.batch_pointer = tf.Variable(0, name="batch_pointer", trainable=False, dtype=tf.int32)

        self.inc_batch_pointer_op = tf.assign(self.batch_pointer, self.batch_pointer + 1)

        self.epoch_pointer = tf.Variable(0, name="epoch_pointer", trainable=False)

        self.batch_time = tf.Variable(0.0, name="batch_time", trainable=False)

        tf.summary.scalar("time_batch", self.batch_time)

        def variable_summaries(var):
            """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
            with tf.name_scope('summaries'):
                mean = tf.reduce_mean(var)
                tf.summary.scalar('mean', mean)
                #with tf.name_scope('stddev'):
                #   stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
                #tf.summary.scalar('stddev', stddev)
                tf.summary.scalar('max', tf.reduce_max(var))
                tf.summary.scalar('min', tf.reduce_min(var))
                #tf.summary.histogram('histogram', var)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])    #(4, 7)
            variable_summaries(softmax_w)
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])  #7
            variable_summaries(softmax_b)
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])  #(7,4)
                inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        tf.summary.scalar("cost", self.cost)
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
예제 #38
0
파일: model.py 프로젝트: yzhuang/notebooks
    def __init__(self, args, infer=False):
        """
        Args:
            infer: whether the model is used for training or inference.
            If doing inference, we need to do two things:
              1. Feed in one word at a time.
              2. Give a loop function to the rnn decoder, in order to
                 feed the previous step output into the next step.
                 Inside the loop function, we prevent gradient updates.
        """
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        # Can also experiment with using rnn_cell.BasicGRUCell here.
        cell_constructor = rnn_cell.BasicLSTMCell

        cell = cell_constructor(args.rnn_size)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        # for training, targets is input_data shifted by one word.
        # see example in text_loader_tests
        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])

        # Init hidden state to all zeroes.
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            # Dimensions should be:
            # Output                   *         w            +      b
            # [batch_size, rnn_size] * [rnn_size, vocab_size] + [vocab_size]
            softmax_w = tf.get_variable('softmax_w', [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable('softmax_b', args.vocab_size)

            # Word embedding.
            # Always place word embedding lookup on the CPU, and save GPU for
            # running the forward and backward pass of the LSTM.
            # Experience from running char-rnn on my GTX 1070 + 6820HK:
            # CPU utilization was about 16% during training, and GPU utilization was about 90%.
            with tf.device("/cpu:0"):
                # We learn this during training, hence this matrix is also a variable.
                # Each row is the word vector for one word.
                # TODO: consider visualizing this embedding using TSNE after training.
                embedding = tf.get_variable('embedding', [args.vocab_size, args.rnn_size])

                # Dimensions: [batch_size, seq_length, word_vector_length==rnn_size]
                embedding_lookup = tf.nn.embedding_lookup(embedding, self.input_data)
                # Split into a list of records, each with dimension: [batch_size, 1, word_vector_length]
                # This is to match tensorflow's LSTM impl: it expects a list of inputs, each is a time step.
                inputs = tf.split(1, args.seq_length, embedding_lookup)
                # Note that tensorflow wants a 2D matrix for each time step, not 3D. So remove dimension 1.
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        # While doing inference, we predict a word at each time step, then we feed the prediction
        # back into the LSTM decoder for the next timestep. This is done by giving this loop
        # function to the rnn decoder.
        # Second arg is the step number. We don't use it here.
        def loop(prev, _):
            # Dimensions:
            #            prev           *          w             +      b
            # [batch_size==1, rnn_size] * [rnn_size, vocab_size] + [vocab_size]
            prev = tf.matmul(prev, softmax_w) + softmax_b
            symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, symbol)

        # last_state has dimension [batch_size, cell.state_size==rnn_size]
        # outputs is a list of records, one for each timestep of dimension [batch_size, rnn_size]
        outputs, last_state = seq2seq.rnn_decoder(
            inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')

        # note that outputs is a list and cannot be multiplied with w.
        # we first reshape outputs to make it [batch_size * seq_length, rnn_size],
        # so we can multiple it with w.
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)

        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state

        # This allows variable learning rate during the training.
        # I.e. we can decrease this over time.
        # Notice the 'trainable=False' flag: we don't want to backprop into lr!
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
예제 #39
0
    def __init__(self, args, infer=False):
        """
        数据预处理完成以后,接下来就是建立seq2seq模型了。建立模型主要分为三步:
        确定好编码器和解码器中cell的结构,即采用什么循环单元,多少个神经元以及多少个循环层;
        将输入数据转化成tensorflow的seq2seq.rnn_decoder需要的格式,并得到最终的输出以及最后一个隐含状态;
        将输出数据经过softmax层得到概率分布,并且得到误差函数,确定梯度下降优化器;

        由于tensorflow提供的rnncell共有三种,分别是RNN、GRU、LSTM,因此这里我们也提供三种选择,并且每一种都可以使用多层结构,
        即MultiRNNCell
        :param args: 
        :param infer: 
        """
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.rnncell == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.rnncell == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.rnncell == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("rnncell type not supported: {}".format(
                args.rnncell))

        cell = cell_fn(args.rnn_size)
        self.cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)
        with tf.variable_scope('rnnlm'):
            softmax_w = build_weight([args.rnn_size, args.vocab_size],
                                     name='soft_w')
            softmax_b = build_weight([args.vocab_size], name='soft_b')
            word_embedding = build_weight(
                [args.vocab_size, args.embedding_size], name='word_embedding')
            inputs_list = tf.split(
                1, args.seq_length,
                tf.nn.embedding_lookup(word_embedding, self.input_data))
            inputs_list = [tf.squeeze(input_, [1]) for input_ in inputs_list]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(word_embedding, prev_symbol)

        # 用于建立seq2seq的函数,rnn_decoder以及attention_decoder
        if not args.attention:
            outputs, last_state = seq2seq.rnn_decoder(
                inputs_list,
                self.initial_state,
                self.cell,
                loop_function=loop if infer else None,
                scope='rnnlm')
            # rnn_decoder函数主要有四个参数
            # decoder_inputs其实就是输入的数据,要求的格式为一个list,并且list中的tensor大小应该为[batch_size,input_size],
            # 换句话说这个list的长度就是seq_length;但我们原始的输入数据的维度为[args.batch_size, args.seq_length],
            # 是不是感觉缺少了一个input_size维度,其实这个维度就是word_embedding的维度,或者说word2vec的大小,
            # 这里需要我们手动进行word_embedding,并且这个embedding矩阵是一个可以学习的参数

            # initial_state是cell的初始状态,其维度是[batch_size,cell.state_size],
            # 由于rnn_cell模块提供了对状态的初始化函数,因此我们可以直接调用

            # cell就是我们要构建的解码器和编码器的cell,上面已经提过了。
            # 最后一个参数是loop_function,其作用是在生成的时候,我们需要把解码器上一时刻的输出作为下一时刻的输入,
            # 并且这个loop_function需要我们自己写

            # 其中outputs是与decoder_inputs同样维度的量,即每一时刻的输出;
            # last_state的维度是[batch_size,cell.state_size],即最后时刻的所有cell的状态。
            # 接下来需要outputs来确定目标函数,而last-state的作用是作为抽样生成函数下一时刻的状态

        else:
            self.attn_length = 5
            self.attn_size = 32
            self.attention_states = build_weight(
                [args.batch_size, self.attn_length, self.attn_size])
            outputs, last_state = seq2seq.attention_decoder(
                inputs_list,
                self.initial_state,
                self.attention_states,
                self.cell,
                loop_function=loop if infer else None,
                scope='rnnlm')

        self.final_state = last_state
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])], args.vocab_size)

        # tensorflow中提供了sequence_loss_by_example函数用于按照权重来计算整个序列中每个单词的交叉熵,
        # 返回的是每个序列的log-perplexity。为了使用sequence_loss_by_example函数,
        # 我们首先需要将outputs通过一个前向层,同时我们需要得到一个softmax概率分布

        # average loss for each word of each timestep
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.lr = tf.Variable(0.0, trainable=False)
        self.var_trainable_op = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(self.cost, self.var_trainable_op), args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)

        # train_op即为训练时需要运行的
        self.train_op = optimizer.apply_gradients(
            zip(grads, self.var_trainable_op))
        self.initial_op = tf.global_variables_initializer()
        self.saver = tf.train.Saver(tf.global_variables(),
                                    max_to_keep=5,
                                    keep_checkpoint_every_n_hours=1)
        self.logfile = args.log_dir + str(
            datetime.datetime.strftime(datetime.datetime.now(),
                                       '%Y-%m-%d %H:%M:%S') + '.txt').replace(
                                           ' ', '').replace('/', '')
        self.var_op = tf.global_variables()
예제 #40
0
파일: model.py 프로젝트: Styrke/master-code
    def build(self):
        print('  Building model')
        self.embeddings = tf.Variable(tf.random_normal(
            [self.alphabet_size, self.embedd_dims], stddev=0.1),
                                      name='embeddings')

        X_embedded = tf.gather(self.embeddings, self.Xs, name='embed_X')
        t_embedded = tf.gather(self.embeddings, self.ts_go, name='embed_t')

        with tf.variable_scope('split_X_inputs'):
            X_list = tf.split(split_dim=1,
                              num_split=self.max_x_seq_len,
                              value=X_embedded)
            X_list = [tf.squeeze(X) for X in X_list]
            [X.set_shape([None, self.embedd_dims]) for X in X_list]

        with tf.variable_scope('split_t_inputs'):
            t_list = tf.split(split_dim=1,
                              num_split=self.max_t_seq_len,
                              value=t_embedded)
            t_list = [tf.squeeze(t) for t in t_list]
            [t.set_shape([None, self.embedd_dims]) for t in t_list]

        with tf.variable_scope('dense_out'):
            W_out = tf.get_variable('W_out',
                                    [self.rnn_units, self.alphabet_size])
            b_out = tf.get_variable('b_out', [self.alphabet_size])

        cell = rnn_cell.GRUCell(self.rnn_units)

        # encoder
        enc_outputs, enc_state = rnn.rnn(cell=cell,
                                         inputs=X_list,
                                         dtype=tf.float32,
                                         sequence_length=self.X_len,
                                         scope='rnn_encoder')

        tf.histogram_summary('final_encoder_state', enc_state)

        # The loop function provides inputs to the decoder:
        def decoder_loop_function(prev, i):
            def feedback_on():
                prev_1 = tf.matmul(prev, W_out) + b_out
                # feedback is on, so feed the decoder with the previous output
                return tf.gather(self.embeddings, tf.argmax(prev_1, 1))

            def feedback_off():
                # feedback is off, so just feed the decoder with t's
                return t_list[i]

            return tf.cond(self.feedback, feedback_on, feedback_off)

        # decoder
        dec_out, dec_state = (seq2seq.rnn_decoder(
            decoder_inputs=t_list,
            initial_state=enc_state,
            cell=cell,
            loop_function=decoder_loop_function))

        self.out = [tf.matmul(d, W_out) + b_out for d in dec_out]

        # for debugging network (NOTE should write this outside of build)
        out_packed = tf.pack(self.out)
        out_packed = tf.transpose(out_packed, perm=[1, 0, 2])
        self.out_tensor = out_packed

        # add TensorBoard summaries for all variables
        tf.contrib.layers.summarize_variables()
예제 #41
0
파일: model.py 프로젝트: tnq177/char-rnn
    def __init__(self, config):
        """Init model from provided configuration
		
		Args:
		    config (dict): Model's configuration
		    	Should have:
				rnn_size: 	size of RNN hidden state
				num_layers: number of RNN layers
				rnn_type:	lstm, rnn, or gru
				batch_size:	batch size
				seq_length: sequence length
				grad_clip: 	Clip gradient value by this value
				vocab_size: size of vocabulary
				infer:		True/False, if True, use the predicted output
							to feed back to RNN insted of gold target output.
				is_train:	True if is training
		"""

        logger.info("Create model with options: \n{}".format(pprint.pformat(config)))
        self.rnn_size = config["rnn_size"]
        self.num_layers = config["num_layers"]
        self.rnn_type = config["rnn_type"]
        self.batch_size = config["batch_size"]
        self.seq_length = config["seq_length"]
        self.grad_clip = config["grad_clip"]
        self.vocab_size = config["vocab_size"]
        self.infer = config["infer"]
        self.is_train = config["is_train"]
        self.reuse = config["reuse"]

        if self.infer:
            self.batch_size = 1
            self.seq_length = 1

        if self.rnn_type == "rnn":
            cell_fn = rnn_cell.BasicRNNCell
        elif self.rnn_type == "gru":
            cell_fn = rnn_cell.GRUCell
        elif self.rnn_type == "lstm":
            cell_fn = rnn_cell.LSTMCell
        else:
            msg = "Rnn type should be either rnn, gru or lstm"
            logger.error(msg)
            sys.exit(msg)

            # Define the cell
        cell = cell_fn(self.rnn_size)
        # Create multiple layers RNN
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * self.num_layers)

        self.input_data = tf.placeholder(tf.int32, [self.batch_size, self.seq_length])
        self.targets = tf.placeholder(tf.int32, [self.batch_size, self.seq_length])
        self.initial_state = cell.zero_state(self.batch_size, tf.float32)

        with tf.variable_scope(MODEL_SCOPE, reuse=self.reuse):
            softmax_w = tf.get_variable("softmax_w", [self.rnn_size, self.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [self.vocab_size])

            # Model params stored in DEVICE_SCOPE (here using GPU)
            with tf.device(DEVICE_SCOPE):
                embeddings = tf.get_variable("embeddings", [self.vocab_size, self.rnn_size])

                # Split it into list of step input, i.e. along dimension 1
                inputs = tf.split(1, self.seq_length, tf.nn.embedding_lookup(embeddings, self.input_data))
                """
				tf.split works like numply.split, inputs is now a list of step
				inputs (to rnn). Each step input has shape (batch_size, 1, rnn_size).
				We don't need that dimension 1, remove it by squeezing.
				"""
                inputs = [tf.squeeze(_input, [1]) for _input in inputs]

            """
			Instead of writing the neuralnet manually, use seq2seq.rnn_decoder.
			In test time, the predicted output is fed back to RNN instead of 
			gold target output like in training time.
			"""

            def loop(prev, _):
                prev = tf.matmul(prev, softmax_w) + softmax_b
                # Wow, this stop_gradient is cool
                prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
                return tf.nn.embedding_lookup(embeddings, prev_symbol)

            outputs, last_state = seq2seq.rnn_decoder(
                inputs, self.initial_state, cell, loop_function=loop if self.infer else None, scope=MODEL_SCOPE
            )
            # Concat each sequence of the batch
            output = tf.reshape(tf.concat(1, outputs), [-1, self.rnn_size])  # now (batch_size x seq_length) x rnn_size
            self.logits = tf.matmul(output, softmax_w) + softmax_b
            self.probs = tf.nn.softmax(self.logits)
            loss = seq2seq.sequence_loss_by_example(
                [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([self.batch_size * self.seq_length])]
            )
            self.cost = tf.reduce_sum(loss) / (self.batch_size * self.seq_length)
            self.final_state = last_state

            if not self.is_train:
                return

            self.lr = tf.Variable(0.0, trainable=False)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.grad_clip)

            optimizer = tf.train.AdamOptimizer(self.lr)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
예제 #42
0
####################

# hand made seq2seq

# outputs_le, finstate = rnn.rnn(neurons, inputs, init_state)
# inp_state = array_ops.slice(finstate, [0, 0], [batch_size, input_size])
# le_state = array_ops.slice(finstate, [0, input_size], [batch_size, le_size])
# finstate = array_ops.concat(1, [le_state, inp_state])
# outputs, finstate = rnn.rnn(neurons_out, outputs_le, finstate, scope="out")

####################

# official seq2seq (perfect regression)

_, enc_state = rnn.rnn(neurons, inputs, initial_state = state)
outputs, finstate = ss.rnn_decoder(targets, enc_state, neurons)

loss = tf.add_n([ tf.nn.l2_loss(target - output) for output, target in zip(outputs, targets) ]) / bptt_steps / batch_size / net_size

lr = tf.Variable(0.0, trainable=False)

tvars = tf.trainable_variables()
grads_raw = tf.gradients(loss, tvars)
grads, _ = tf.clip_by_global_norm(grads_raw, 5.0)


# optimizer = tf.train.GradientDescentOptimizer(lr)
# optimizer = tf.train.AdagradOptimizer(lr)
optimizer = tf.train.AdamOptimizer(lr)
# optimizer = tf.train.RMSPropOptimizer(lr)
# optimizer = tf.train.AdadeltaOptimizer(lr)