Esempio n. 1
0
    def __init__(self, isTraining, **attribs):
        """The initializer for encoder class.

        Args:
            isTraining: Whether the network is in training mode or not. This
                would affect whether dropout is used or not.
            **attribs: A dictionary of attributes used by encoder like:
                hidden_size: Hidden size of LSTM cell used for encoding
                num_layers: Number of hidden layers used
                bi_dir: Boolean determining whether the encoder is
                    bidirectional or not
                num_encoder_symbols: Vocabulary size of input symbols
                embedding_size: Embedding size used to feed in input symbols
                out_prob(Optional): (1 - Dropout probability)
        """
        self.isTraining = isTraining
        if self.isTraining:
            # Dropout is only used during training
            self.out_prob = attribs['out_prob']
        self.hidden_size = attribs['hidden_size']
        self.num_layers = attribs['num_layers']
        self.bi_dir = attribs['bi_dir']
        # Create the LSTM cell using the hidden size attribute
        self.cell = rnn_cell.BasicLSTMCell(self.hidden_size,
                                           state_is_tuple=True)
        if self.isTraining:
            # During training a dropout wrapper is used
            self.cell = rnn_cell.DropoutWrapper(self.cell,
                                                output_keep_prob=self.out_prob)

        self.vocab_size = attribs['num_encoder_symbols']
        self.emb_size = attribs['embedding_size']
Esempio n. 2
0
    def __init__(self, isTraining, enc_attribs):
        """Initializer for encoder class.

        Args:
            isTraining: Whether the network is in training mode or not. This
                would affect whether dropout is used or not.
            enc_attribs: A dictionary of attributes used by encoder like:
                hidden_size: Hidden size of LSTM cell used for encoding
                num_layers: Number of hidden layers used
                vocab_size: Vocabulary size of input symbols
                emb_size: Embedding size used to feed in input symbols
                out_prob(Optional): (1 - Dropout probability)
        """
        self.isTraining = isTraining
        # Update the parameters
        self.__dict__.update(enc_attribs)
        # Create the LSTM cell using the hidden size attribute
        self.cell = rnn_cell.BasicLSTMCell(self.hidden_size,
                                           state_is_tuple=True)
        if self.isTraining:
            # During training a dropout wrapper is used
            self.cell = rnn_cell.DropoutWrapper(self.cell,
                                                output_keep_prob=self.out_prob)
        if self.num_layers > 1:
            self.cell = rnn_cell.MultiRNNCell([self.cell] * self.num_layers,
                                              state_is_tuple=True)
Esempio n. 3
0
    def __init__(self, args, data, infer=False):
        if infer:
            args.batch_size = 1
            args.seq_length = 1
        with tf.name_scope('inputs'):
            self.input_data = tf.placeholder(
                tf.int32, [args.batch_size, args.seq_length])
            self.target_data = tf.placeholder(
                tf.int32, [args.batch_size, args.seq_length])

        with tf.name_scope('model'):
            self.cell = rnn_cell.BasicLSTMCell(args.state_size)
            self.cell = rnn_cell.MultiRNNCell([self.cell] * args.num_layers)
            self.initial_state = self.cell.zero_state(args.batch_size,
                                                      tf.float32)
            with tf.variable_scope('rnnlm'):
                w = tf.get_variable('softmax_w',
                                    [args.state_size, data.vocab_size])
                b = tf.get_variable('softmax_b', [data.vocab_size])
                with tf.device("/cpu:0"):
                    embedding = tf.get_variable(
                        'embedding', [data.vocab_size, args.state_size])
                    inputs = tf.nn.embedding_lookup(embedding, self.input_data)
            outputs, last_state = tf.nn.dynamic_rnn(
                self.cell, inputs, initial_state=self.initial_state)

        with tf.name_scope('loss'):
            output = tf.reshape(outputs, [-1, args.state_size])

            self.logits = tf.matmul(output, w) + b
            self.probs = tf.nn.softmax(self.logits)
            self.last_state = last_state

            targets = tf.reshape(self.target_data, [-1])
            loss = seq2seq.sequence_loss_by_example(
                [self.logits], [targets],
                [tf.ones_like(targets, dtype=tf.float32)])
            self.cost = tf.reduce_sum(loss) / args.batch_size
            tf.summary.scalar('loss', self.cost)

        with tf.name_scope('optimize'):
            self.lr = tf.placeholder(tf.float32, [])
            tf.summary.scalar('learning_rate', self.lr)

            optimizer = tf.train.AdamOptimizer(self.lr)
            tvars = tf.trainable_variables()
            grads = tf.gradients(self.cost, tvars)
            for g in grads:
                tf.summary.histogram(g.name, g)
            grads, _ = tf.clip_by_global_norm(grads, args.grad_clip)

            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
            self.merged_op = tf.summary.merge_all()
Esempio n. 4
0
    def set_cell_config(self):
        """Create the LSTM cell used by decoder."""
        # Use the BasicLSTMCell - https://arxiv.org/pdf/1409.2329.pdf
        cell = rnn_cell.BasicLSTMCell(self.hidden_size, state_is_tuple=True)
        if self.isTraining:
            # During training we use a dropout wrapper
            cell = rnn_cell.DropoutWrapper(cell,
                                           output_keep_prob=self.out_prob)
        if self.num_layers > 1:
            # If RNN is stacked then we use MultiRNNCell class
            cell = rnn_cell.MultiRNNCell([cell] * self.num_layers,
                                         state_is_tuple=True)

        # Use the OutputProjectionWrapper to project cell output to output
        # vocab size. This projection is fine for a small vocabulary output
        # but would be bad for large vocabulary output spaces.
        cell = rnn_cell.OutputProjectionWrapper(cell, self.vocab_size)
        return cell
Esempio n. 5
0
    def recurrent_network_model(self, x):

        layer = {
            'weights':
            tf.Variable(
                tf.random_normal([self.rnn_size, self.n_nodes_output], 0,
                                 0.1)),
            'biases':
            tf.Variable(tf.random_normal([self.n_nodes_output], 0, 0.1))
        }

        #x = tf.transpose(x, [1, 0, 2])
        #x = tf.reshape(x, [-1, chunk_size])
        #x = tf.split(0, n_chunks, x)

        lstm_cell = core_rnn_cell.BasicLSTMCell(self.rnn_size)
        outputs, states = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32)

        output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']
        return output
Esempio n. 6
0
    def __init__(self, configs, data, infer=False):
        if infer:
            configs.batch_size = 1
            configs.seq_length = 1
        self.input_data = tf.placeholder(tf.int32, [configs.batch_size, configs.seq_length])
        self.target_data = tf.placeholder(tf.int32, [configs.batch_size, configs.seq_length])
        self.lr = tf.placeholder(tf.float32, [])

        #cell definition
        self.cell = rnn.BasicLSTMCell(configs.state_size)
        self.cell = rnn.MultiRNNCell([self.cell] * configs.num_layers)
        self.initial_state = self.cell.zero_state(configs.batch_size, tf.float32)

        # para definitions
        w = tf.get_variable('softmax_w', [configs.state_size, data.vocab_size])
        b = tf.get_variable('softmax_b', [data.vocab_size])

        #embedding
        embedding = tf.get_variable('embedding', [data.vocab_size, configs.state_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        #output
        output, last_state = tf.nn.dynamic_rnn(self.cell, inputs, initial_state=self.initial_state)
        output_new = tf.reshape(output, [-1, configs.state_size])

        #logit computation
        self.logits = tf.matmul(output_new, w) + b
        self.probs = tf.nn.softmax(self.logits)
        self.last_state = last_state

        #comparison
        target = tf.reshape(self.target_data, [-1])
        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example([self.logits], [target], [tf.ones_like(target, dtype=tf.float32)])
        self.cost = tf.reduce_sum(loss) / configs.batch_size

        #optimizer
        optimizer = tf.train.AdamOptimizer(self.lr)
        tvars = tf.trainable_variables()
        grads = tf.gradients(self.cost, tvars)
        grads, _ = tf.clip_by_global_norm(grads, configs.grad_clip)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))