Esempio n. 1
0
    def lsrc_sequence_graph_old(self, config, inputs):
        """
        Build the recurrence graph of the LSRC model. 
        It returns the output and the last hidden layer
        """

        outputs = inputs
        last_state = []

        for layer in range(self.num_layers):
            inputs, last_local_state_ = legacy_seq2seq.rnn_decoder(
                outputs,
                self.initial_state[layer].Local,
                self.cell[layer].Local,
                loop_function=None)
            outputs, last_global_state_ = legacy_seq2seq.rnn_decoder(
                inputs,
                self.initial_state[layer].Global,
                self.cell[layer].Global,
                loop_function=None)
            last_lsrc_states_ = LSRCTuple(last_local_state_,
                                          last_global_state_)
            last_state.append(last_lsrc_states_)

        output = tf.reshape(tf.concat(outputs, 1),
                            [-1, self.global_state_size])

        return output, last_state
Esempio n. 2
0
    def generate(self):
        inputs = tf.split(axis=1,
                          num_or_size_splits=self.args.seq_length,
                          value=tf.nn.embedding_lookup(self.embedding,
                                                       self.input_data))
        inputs = map(lambda i: tf.nn.l2_normalize(i, 1),
                     [tf.squeeze(input_, [1]) for input_ in inputs])

        def loop(prev, i):
            return prev

        with tf.variable_scope('GEN', reuse=self.has_init_seq2seq) as scope:
            self.has_init_seq2seq = True
            if self.args.num_layers == 1:
                outputs, last_state = legacy_seq2seq.rnn_decoder(
                    inputs, [self.initial_state1],
                    self.cell,
                    loop_function=loop,
                    scope=scope)
            elif self.args.num_layers == 2:
                outputs, last_state = legacy_seq2seq.rnn_decoder(
                    inputs, [self.initial_state1, self.initial_state2],
                    self.cell,
                    loop_function=loop,
                    scope=scope)
            else:
                raise Exception(
                    'Unsupported number of layers. Use 1 or 2 layers for now..'
                )
            outputs = map(lambda o: tf.nn.l2_normalize(o, 1), outputs)
        self.outputs = outputs
        return outputs
Esempio n. 3
0
    def __init__(self, args, embedding):
        self.args = args

        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_input')
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_targets')
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)
        self.embedding = embedding
        with tf.variable_scope('STAND'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            inputs = tf.split(axis=1, num_or_size_splits=args.seq_length, value=tf.nn.embedding_lookup(self.embedding, self.input_data))
            inputs = map(lambda i: tf.nn.l2_normalize(i, 1), [tf.squeeze(input_, [1]) for input_ in inputs])

        def loop(prev, i):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.l2_normalize(tf.nn.embedding_lookup(embedding, prev_symbol), 1)

        o, _ = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=None, scope='STAND')
        with tf.variable_scope('STAND', reuse=True) as scope:
            sf_o, _ = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop, scope=scope)
        output = tf.reshape(tf.concat(axis=1, values=o), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)

        sf_output = tf.reshape(tf.concat(axis=1, values=sf_o), [-1, args.rnn_size])
        self_feed_logits = tf.matmul(sf_output, softmax_w) + softmax_b
        self.self_feed_probs = tf.nn.softmax(self_feed_logits)

        loss = legacy_seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.loss = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                args.grad_clip)
        for g, v in zip(grads, tvars):
            print v.name
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Esempio n. 4
0
    def createGraph(self):

        self.input = tf.placeholder(tf.int32, [self.batch_size, self.seq_len],
                                    name='inputs')
        self.targs = tf.placeholder(tf.int32, [self.batch_size, self.seq_len],
                                    name='targets')
        onehot = tf.one_hot(self.input, self.vocab_size, name='input_oh')

        inputs = tf.split(onehot, self.seq_len, 1)
        inputs = [tf.squeeze(i, [1]) for i in inputs]
        targets = tf.split(self.targs, self.seq_len, 1)

        with tf.variable_scope("posRNN"):

            cells = [GRUCell(self.num_hidden) for _ in range(self.num_layers)]

            stacked = MultiRNNCell(cells, state_is_tuple=True)
            self.zero_state = stacked.zero_state(self.batch_size, tf.float32)

            outputs, self.last_state = seq2seq.rnn_decoder(
                inputs, self.zero_state, stacked)

            w = tf.get_variable(
                "w", [self.num_hidden, self.vocab_size],
                tf.float32,
                initializer=tf.random_normal_initializer(stddev=0.02))
            b = tf.get_variable("b", [self.vocab_size],
                                initializer=tf.constant_initializer(0.0))
            logits = [tf.matmul(o, w) + b for o in outputs]

            const_weights = [
                tf.ones([self.batch_size]) for _ in xrange(self.seq_len)
            ]
            self.loss = seq2seq.sequence_loss(logits, targets, const_weights)

            self.opt = tf.train.AdamOptimizer(0.001,
                                              beta1=0.5).minimize(self.loss)

        with tf.variable_scope("posRNN", reuse=True):

            batch_size = 1
            self.s_inputs = tf.placeholder(tf.int32, [batch_size],
                                           name='s_inputs')
            s_onehot = tf.one_hot(self.s_inputs,
                                  self.vocab_size,
                                  name='s_input_oh')

            self.s_zero_state = stacked.zero_state(batch_size, tf.float32)
            s_outputs, self.s_last_state = seq2seq.rnn_decoder(
                [s_onehot], self.s_zero_state, stacked)
            s_outputs = tf.reshape(s_outputs, [1, self.num_hidden])
            self.s_probs = tf.nn.softmax(tf.matmul(s_outputs, w) + b)
Esempio n. 5
0
    def add_rnn_decoder_layer(self):
        # https://stackoverflow.com/questions/36994067/no-feed-previous-argument-for-tensorflow-basic-rnn-seq2seq-function
        weights = tf.Variable(
            tf.random_normal(
                [self.hidden_size, self.output_size],
                # mean=0.5,
                stddev=0.5,
                dtype=tf.float64))
        biases = tf.Variable(
            tf.random_normal(
                [self.output_size],
                # mean=0.5,
                stddev=0.5,
                dtype=tf.float64))

        def inference_loop_function(prev, _):
            return tf.matmul(prev, weights) + biases

        loop_function = inference_loop_function if self.feed_previous else None
        layers = rnn.MultiRNNCell([
            self.lstm_cell(self.hidden_size) for i in range(self.layer_depth)
        ],
                                  state_is_tuple=True)
        outputs, self.cell_states = legacy_seq2seq.rnn_decoder(
            tf.unstack(self.ys, axis=1),
            self.final_enc_states,
            layers,
            loop_function=loop_function)

        self.cell_outputs = tf.stack(outputs, axis=1)
Esempio n. 6
0
    def Generate(self):
        with tf.variable_scope('gen') as scope:
            inputs = tf.split(
                tf.nn.embedding_lookup(self.emb_matrix, self.input_data),
                self.args.seq_length, 1)
            inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
            outputs, _ = legacy_seq2seq.rnn_decoder(inputs,
                                                    self.initial_state,
                                                    self.cell_fn,
                                                    scope='rnn_dec')

            output_ = tf.reshape(tf.concat(outputs, 1),
                                 [-1, self.args.rnn_size])
            logits = tf.matmul(tf.cast(output_, tf.float32),
                               self.weight) + self.bias

            probs = tf.nn.softmax(logits)
            pred = tf.multinomial(probs, 1)

            prediction = tf.reshape(
                pred, [self.args.batch_size, self.args.seq_length])

            fake_data = tf.concat(self.input_data, prediction, 1)
            tvars = tf.trainable_variables()
            Gtvars = [
                v for v in tf.all_variables() if v.name.startswith(scope.name)
            ]
            print Gtvars
Esempio n. 7
0
    def __init__(self, args, training=True):
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])

        embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])

        cells = []
        for _ in range(args.rnn_layers):
            cells.append(rnn.BasicLSTMCell(args.rnn_size))
        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        dense_layer_w = tf.get_variable("dense_layer_w", [args.rnn_size, args.vocab_size])
        dense_layer_b = tf.get_variable("dense_layer_b", [args.vocab_size])

        inputs = tf.nn.embedding_lookup(embedding, self.input_data)
        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(ip, [1]) for ip in inputs]

        self.initial_state = cell.zero_state(args.batch_size, tf.float32)
        outputs, self.final_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell)
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
        logits = tf.matmul(output, dense_layer_w) + dense_layer_b
        self.probs = tf.nn.softmax(logits)
        self.predicted_output = tf.reshape(tf.argmax(self.probs, 1), [args.batch_size, args.seq_length])

        self.lr = tf.Variable(0.0, trainable=False)
        loss = sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tf.reshape(self.targets, [-1]))
        self.cost = tf.reduce_mean(loss)
        self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.cost)
Esempio n. 8
0
 def discriminate_wv(self, input_data_wv):
     with tf.variable_scope('DISC', reuse=self.has_init_seq2seq) as scope:
         self.has_init_seq2seq = True
         output_wv, states_wv = legacy_seq2seq.rnn_decoder(
             input_data_wv, self.initial_state, self.cell, scope=scope)
         predicted_classes_wv = tf.matmul(output_wv[-1], self.fc_layer)
     return predicted_classes_wv
Esempio n. 9
0
    def __init__(self, args, training=True):
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        use_dropout = training and (args.output_keep_prob < 1.0 or args.input_keep_prob < 1.0)
        cell_fn = self.select_cell_fn(args.model)
        cells = self.create_cell_stack('hidden', cell_fn, args, use_dropout=use_dropout)
        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        self.input_data = tf.placeholder(
            tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(
            tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        softmax_w = self.create_var('rnnlm', 'softmax_w', [args.rnn_size, args.vocab_size])
        softmax_b = self.create_var('rnnlm', 'softmax_b', [args.vocab_size])

        embedding = self.create_var('rnnlm', 'embedding', [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        # dropout beta testing: double check which one should affect next line
        if training and args.output_keep_prob:
            inputs = tf.nn.dropout(inputs, args.output_keep_prob)

        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])


        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example(
                [self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])])
        with tf.name_scope('cost'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        with tf.name_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # instrument tensorboard
        tf.summary.histogram('logits', self.logits)
        tf.summary.histogram('loss', loss)
        tf.summary.scalar('train_loss', self.cost)
def custom_rnn_seq2seq(encoder_inputs,
                       decoder_inputs,
                       enc_cell,
                       dec_cell,
                       dtype=dtypes.float32,
                       initial_state=None,
                       use_previous=False,
                       scope=None,
                       num_units=0):

    with variable_scope.variable_scope(scope or "custom_rnn_seq2seq"):
        _, enc_state = core_rnn.static_rnn(enc_cell,
                                           encoder_inputs,
                                           dtype=dtype,
                                           scope=scope,
                                           initial_state=initial_state)
        print(enc_state.get_shape)
        c = tf.tanh(
            tf.matmul(tf.get_variable("v", [dim_hidden, dim_hidden]),
                      enc_state))
        h_prime_init = tf.tanh(
            tf.matmul(tf.get_variable("v_prime", [dim_hidden, dim_hidden]), c))
        if not use_previous:
            return seq2seq.rnn_decoder(decoder_inputs,
                                       LSTMStateTuple(c, h_prime_init),
                                       dec_cell,
                                       scope=scope)
        return infer(LSTMStateTuple(c, h_prime_init), dec_cell, num_units)
Esempio n. 11
0
def advanced_rnn_decoder(decoder_inputs,
                         initial_state,
                         cell,
                         num_symbols,
                         output_projection=None,
                         feed_previous=False,
                         scope=None):
    with variable_scope.variable_scope(scope
                                       or "embedding_rnn_decoder") as scope:
        if output_projection is not None:
            dtype = scope.dtype
            proj_weights = ops.convert_to_tensor(output_projection[0],
                                                 dtype=dtype)
            proj_weights.get_shape().assert_is_compatible_with(
                [None, num_symbols])
            proj_biases = ops.convert_to_tensor(output_projection[1],
                                                dtype=dtype)
            proj_biases.get_shape().assert_is_compatible_with([num_symbols])

        if feed_previous:
            loop_function = _extract_last_and_project(output_projection)
        else:
            loop_function = None

        return rnn_decoder(decoder_inputs,
                           initial_state,
                           cell,
                           loop_function=loop_function)
Esempio n. 12
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        additional_cell_args = {}
        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        elif args.model == 'gridlstm':
            cell_fn = grid_rnn.Grid2LSTMCell
            additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0})
        elif args.model == 'gridgru':
            cell_fn = grid_rnn.Grid2GRUCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size, **additional_cell_args)

        self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), args.seq_length, 1)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell,
                                                  loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                                                [tf.reshape(self.targets, [-1])],
                                                [tf.ones([args.batch_size * args.seq_length])],
                                                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(list(zip(grads, tvars)))
Esempio n. 13
0
    def __init__(self,
                 vocabulary_size,
                 batch_size,
                 sequence_length,
                 hidden_layer_size,
                 cells_size,
                 gradient_clip=5.,
                 training=True):

        cells = []
        [cells.append(rnn.LSTMCell(hidden_layer_size)) for _ in range(cells_size)]
        self.cell = rnn.MultiRNNCell(cells)

        self.input_data = tf.placeholder(tf.int32, [batch_size, sequence_length])
        self.targets = tf.placeholder(tf.int32, [batch_size, sequence_length])
        self.initial_state = self.cell.zero_state(batch_size, tf.float32)

        with tf.variable_scope("rnn", reuse=tf.AUTO_REUSE):
            softmax_layer = tf.get_variable("softmax_layer", [hidden_layer_size, vocabulary_size])
            softmax_bias = tf.get_variable("softmax_bias", [vocabulary_size])

        with tf.variable_scope("embedding", reuse=tf.AUTO_REUSE):
            embedding = tf.get_variable("embedding", [vocabulary_size, hidden_layer_size])
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        inputs = tf.split(inputs, sequence_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(previous, _):
            previous = tf.matmul(previous, softmax_layer) + softmax_bias
            previous_symbol = tf.stop_gradient(tf.argmax(previous, 1))
            return tf.nn.embedding_lookup(embedding, previous_symbol)

        with tf.variable_scope("rnn", reuse=tf.AUTO_REUSE):
            outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, self.cell, loop_function=loop if not training else None)
            output = tf.reshape(tf.concat(outputs, 1), [-1, hidden_layer_size])

        self.logits = tf.matmul(output, softmax_layer) + softmax_bias
        self.probabilities = tf.nn.softmax(self.logits)

        loss = legacy_seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([batch_size * sequence_length])])

        with tf.name_scope("cost"):
            self.cost = tf.reduce_sum(loss) / batch_size / sequence_length
        self.final_state = last_state
        self.learning_rate = tf.Variable(0.0, trainable=False)
        trainable_vars = tf.trainable_variables()

        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, trainable_vars), gradient_clip)

        with tf.variable_scope("optimizer", reuse=tf.AUTO_REUSE):
            optimizer = tf.train.AdamOptimizer(self.learning_rate)
            self.train_op = optimizer.apply_gradients(zip(grads, trainable_vars))

        tf.summary.histogram("logits", self.logits)
        tf.summary.histogram("probabilitiess", self.probabilities)
        tf.summary.histogram("loss", loss)
        tf.summary.scalar("cost", self.cost)
        tf.summary.scalar("learning_rate", self.learning_rate)
Esempio n. 14
0
def sample(vocab_inv, vocab, model_dir, sample_length=30, prime=None):
    with tf.Session() as sess:
        cell = setup_cell()
        input_data = tf.placeholder(tf.int32, [1, 1])
        initial_state = cell.zero_state(1, tf.float32)

        main_scope = 'light'
        vocab_size = len(vocab)
        with tf.variable_scope(main_scope, reuse=tf.AUTO_REUSE):
            softmax_w = tf.get_variable("softmax_w", [RNN_SIZE, vocab_size])
            softmax_b = tf.get_variable("softmax_b", [vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding",
                                            [vocab_size, RNN_SIZE])
                inputs = tf.split(
                    tf.nn.embedding_lookup(embedding, input_data), 1, 1)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs,
                                                         initial_state,
                                                         cell,
                                                         scope=main_scope)
        output = tf.reshape(tf.concat(outputs, 1), [-1, RNN_SIZE])
        logits = tf.matmul(output, softmax_w) + softmax_b
        probs = tf.nn.softmax(logits)
        final_state = last_state

        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(model_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('starting sampling')
            state = sess.run(initial_state)

            if not prime:
                prime = random.choice(list(vocab.keys()))
            print('prime is:' + prime)
            x = np.zeros((1, 1))
            x[0, 0] = vocab.get(prime, 0)
            feed = {input_data: x, initial_state: state}
            [state] = sess.run([final_state], feed)
            ret = prime

            word = prime.split()[-1]
            for n in range(sample_length):
                x = np.zeros((1, 1))
                x[0, 0] = vocab.get(word, 0)
                feed = {input_data: x, initial_state: state}
                [state_probs, state] = sess.run([probs, final_state], feed)
                p = state_probs[0]
                t = np.cumsum(p)
                s = np.sum(p)
                sample = int(np.searchsorted(t, np.random.rand(1) * s))
                pred = vocab_inv[sample]
                ret += ' ' + pred
                word = pred
            print('sampling finished')
            print('sampling result: ' + ret)
Esempio n. 15
0
    def __init__(self, args, training=True):
        self.args = args
        # When we don't train then we will take in one character at a time and try to predict
        if not training:
            args.batch_size = 1
            args.seq_length = 1
        # Assign the basic type of RNN unit
        if args.mtype == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.mtype == 'gru':
            cell_fn = rnn.GRUCell
        elif args.mtype == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        elif args.mtype == 'nas':
            cell_fn = rnn.NASCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cells = []
        for _ in range(args.num_layers):
            cell = cell_fn(args.rnn_size)
            cells.append(cell)

        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        softmax_w = tf.get_variable("softmax_w",
                                    [args.rnn_size, args.vocab_size])
        softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        embedding = tf.get_variable("embedding",
                                    [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)
        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        outputs, last_state = legacy_seq2seq.rnn_decoder(
            inputs, self.initial_state, cell)
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])

        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        self.predicted_output = tf.reshape(tf.argmax(self.probs, 1),
                                           [args.batch_size, args.seq_length])

        loss = sparse_softmax_cross_entropy_with_logits(
            logits=[self.logits], labels=[tf.reshape(self.targets, [-1])])

        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.cost)
Esempio n. 16
0
def model(cell_state_size, rnn_cells_depth, batch_size, batch_len,
          number_of_tokens, reuse):

    input_placeholder = tf.placeholder(tf.int32,
                                       shape=(None, batch_len),
                                       name="input")
    target_placeholder = tf.placeholder(tf.int32,
                                        shape=(None, batch_len),
                                        name="target")
    # make dictionary for letters (60, 128)

    with tf.variable_scope("rnn") as scope:
        if reuse:
            scope.reuse_variables()

        cell = tf.nn.rnn_cell.BasicLSTMCell(cell_state_size)
        #cell = tf.nn.rnn_cell.BasicRNNCell(cell_state_size)
        #cell = tf.contrib.rnn.IntersectionRNNCell(cell_state_size)
        #cell = tf.contrib.rnn.LSTMCell(cell_state_size)
        #cell = tf.contrib.rnn.TimeFreqLSTMCell(cell_state_size)
        rnn_cell = tf.nn.rnn_cell.MultiRNNCell([cell] * rnn_cells_depth)

        W = tf.get_variable("W", shape=(128, number_of_tokens))
        b = tf.get_variable("b", shape=(number_of_tokens))

        embedding = tf.get_variable("embedding", [number_of_tokens, 128])
        # (60, 50, 128)
        rnn_input = tf.nn.embedding_lookup(embedding, input_placeholder)
        # 50 of (60, 1, 128)
        rnn_input = tf.split(rnn_input, batch_len, axis=1)
        rnn_input = [tf.squeeze(rni, [1]) for rni in rnn_input]

        # map input from id numbers to rnn states
        decoder_initial_state = rnn_cell.zero_state(batch_size, tf.float32)
        # outputs list of 50 - (60,128)
        outputs, last_state = seq2seq.rnn_decoder(rnn_input,
                                                  decoder_initial_state,
                                                  rnn_cell,
                                                  scope="rnn")
    # (60, -1)
    outputs = tf.concat(outputs, 1)
    # (-1, 128) ie a list of letters
    outputs = tf.reshape(outputs, [-1, 128])

    # (3000, number_of_tokens)
    logits = tf.matmul(outputs, W) + b
    #probs = tf.nn.softmax(logits, 1, name="probs")
    probs = tf.nn.softmax(logits, -1, name="probs")

    loss = seq2seq.sequence_loss_by_example(
        [logits], [tf.reshape(target_placeholder, [-1])],
        [tf.ones([batch_size * batch_len])], number_of_tokens)
    return ([
        loss, probs, decoder_initial_state, input_placeholder,
        target_placeholder, last_state, logits
    ])
Esempio n. 17
0
 def time_sequence_graph(self, inputs):
     """
     Apply the recurrence cell to an input sequence (each batch entry is a sequence of words).
     return: stacked cell outputs of the complete sequence in addition to the last hidden state 
     (and memory for LSTM/LSTMP) obtained after processing the last word (in each batch entry).
     """
     outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, self.cell, loop_function=None)
     output = tf.reshape(tf.concat(outputs, 1), [-1, self.recurrent_state_size])
         
     return output, last_state
Esempio n. 18
0
 def noembedding_rnn_decoder(self, decoder_inputs, init_state, cell):
     loop_function = self._extract_argmax(
         self.fake_embedding,
         self.decoder_output_projection) if self.feed_previous else None
     emb_inp = (tf.nn.embedding_lookup(self.fake_embedding, i)
                for i in decoder_inputs)
     return rnn_decoder(emb_inp,
                        init_state,
                        cell,
                        loop_function=loop_function)
Esempio n. 19
0
def decode_seq(decoder_inputs, decoder_init_state, hidden_size, vocab_size, initializer=tf.contrib.layers.xavier_initializer()):
  with tf.variable_scope("decoder"): # Decoder layer (train)
    cell_fw2 = tf.nn.rnn_cell.LSTMCell(hidden_size, initializer=initializer) # LSTM cell.   decoder num_neurons = hidden_size = intent dim
    decoder_output, _ = legacy_seq2seq.rnn_decoder(decoder_inputs=decoder_inputs, initial_state=decoder_init_state, cell=cell_fw2) # [batch_size, pad_len-1, hidden_size]
    decoder_output = tf.stack(decoder_output, axis=1) # [batch_size, pad_len-1, hidden_size]

  with tf.variable_scope("linear_projection"): # Projection layer
    W_proj =tf.get_variable("weights",[1,hidden_size, vocab_size], initializer=initializer) # hidden_size to vocab_size
    logits = tf.nn.conv1d(decoder_output, W_proj, 1, "VALID", name="logits") # project [batch_size, pad_length-1, vocab_size]

  return logits
Esempio n. 20
0
 def __init__(self,args, mode='TRAIN'):
     '''Create the model.
     
     Args:
         args: parsed arguments
         mode: TRAIN | EVAL | INFER
     '''
     # When sample, the batch and seq length = 1
     if mode == 'INFER':
         args.batch_size = 1
         args.seq_length = 1
     cell = rnn.BasicLSTMCell(args.rnn_size,state_is_tuple = True)
     self.cell = cell = rnn.MultiRNNCell([cell]*args.num_layers, state_is_tuple = True)
     
     # Build the inputs and outputs placeholders
     self.input_data = tf.placeholder(tf.int32,[args.batch_size,args.seq_length])
     self.targets = tf.placeholder(tf.int32,[args.batch_size,args.seq_length])
     self.initial_state = cell.zero_state(args.batch_size,dtype = tf.float32)
     
     with tf.name_scope('rnn_cells'):
         # final w
         softmax_w = tf.get_variable('softmax_w',[args.rnn_size,args.vocab_size])
         # final bias
         softmax_b = tf.get_variable('softmax_b',[args.vocab_size])
         with tf.device('/cpu:0'):
             embedding = tf.get_variable('embedding',[args.vocab_size,args.rnn_size],
                                         dtype = tf.float32)
             inputs = tf.split(tf.nn.embedding_lookup(embedding,self.input_data),
                               args.seq_length,1)
             inputs = [tf.squeeze(input_,[1]) for input_ in inputs]
     
     def loop(prev, _):
         prev = tf.matmul(prev,softmax_w) + softmax_b
         prev_symbol = tf.stop_gradient(tf.argmax(prev,1))
         return tf.nn.embedding_lookup(embedding,prev_symbol)
     
     ## Using legacy_seq2seq#####################################
     outputs, last_state = legacy_seq2seq.rnn_decoder(
             inputs,self.initial_state,cell,loop_function=loop if mode != 'INFER' else None,
             scope = 'rnn_cells')
     output = tf.reshape(tf.concat(outputs,1),[-1,args.rnn_size])
     self.logits = tf.matmul(output,softmax_w) + softmax_b
     self.probs = tf.nn.softmax(self.logits)
     loss = legacy_seq2seq.sequence_loss_by_example(
             logits = [self.logits],targets = [tf.reshape(self.targets,[-1])],
             weights = [tf.ones([args.batch_size*args.seq_length])])
     self.cost = tf.reduce_mean(loss)/args.batch_size/args.seq_length
     self.final_state = last_state
     self.lr = tf.Variable(0.0, trainable = False)
     tvars = tf.trainable_variables()
     grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost,tvars),args.grad_clip)
     optimizer = tf.train.AdamOptimizer(self.lr)
     self.train_op = optimizer.apply_gradients(zip(grads,tvars))
Esempio n. 21
0
    def __init__(self, args, training=True):
        """Initialize RNN model"""
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        cell_fn = rnn.GRUCell
        cells = [cell_fn(args.rnn_size) for _ in range(args.num_layers)]

        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)
        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnn'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell,
                                                         loop_function=loop if not training else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])

        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example(
                [self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])])

        with tf.name_scope('loss'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length

        self.final_state = last_state
        self.learning_rate = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip)

        with tf.name_scope('optimization'):
            optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Esempio n. 22
0
    def __init__(self, args, training=True):
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        def lstm_cell(lstm_size):
            return tf.contrib.rnn.BasicLSTMCell(lstm_size)

        cells = []
        for i in range(args.num_layers):
            cells.append(lstm_cell(args.lstm_size))

        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.output_data = tf.placeholder(tf.int32,
                                          [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('lstm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.lstm_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        embedding = tf.get_variable("embedding",
                                    [args.vocab_size, args.lstm_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        inputs = tf.split(
            inputs, args.seq_length,
            1)  # splits the input into subtensor sequences dimension 1
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        outputs, last_state = legacy_seq2seq.rnn_decoder(
            inputs, self.initial_state, cell)
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.lstm_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        self.predicted_output = tf.reshape(tf.argmax(self.probs, 1),
                                           [args.batch_size, args.seq_length])

        ## loss definition
        loss = legacy_seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.output_data, [-1])],
            [tf.ones([args.batch_size * args.seq_length])])
        with tf.name_scope('cost'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.eta = tf.Variable(0.0, trainable=False)
        self.optimizer = tf.train.AdamOptimizer(self.eta).minimize(self.cost)
Esempio n. 23
0
    def lsrc_global_sequence_graph(self, config, inputs):
        """
        Build the recurrence graph of the global state of the LSRC model. 
        It returns a list of the hidden outputs and the last hidden layer
        """
        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs,
                                                         self.global_state,
                                                         self.global_cell,
                                                         loop_function=None)
        output = tf.reshape(tf.concat(outputs, 1),
                            [-1, self.global_state_size])

        return output, last_state
Esempio n. 24
0
    def createGraph(self):
        # -------------------------------------------
        # Inputs

        self.in_ph = tf.placeholder(tf.int32,
                                    [self.batch_size, self.sequence_length],
                                    name='inputs')
        self.target_profile = tf.placeholder(
            tf.float32, [self.batch_size, self.profile_size], name="target")

        in_onehot = tf.one_hot(self.in_ph,
                               self.vocab_size,
                               name="input_onehot")
        inputs = tf.split(in_onehot, self.sequence_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        # -------------------------------------------
        # Computation Graph

        with tf.variable_scope("profRNN"):
            cells = [
                rnn_cell.GRUCell(self.state_dim)
                for i in range(self.num_layers)
            ]
            # cells = [GORUCell( state_dim, str(i) ) for i in range(num_layers)]

            self.stacked_cells = rnn_cell.MultiRNNCell(cells,
                                                       state_is_tuple=True)
            self.initial_state = self.stacked_cells.zero_state(
                self.batch_size, tf.float32)
            # call seq2seq.rnn_decoder
            outputs, self.final_state = seq2seq.rnn_decoder(
                inputs, self.initial_state, self.stacked_cells)
            # transform the list of state outputs to a list of logits.
            # use a linear transformation.
            self.W = tf.get_variable("W", [self.state_dim, self.profile_size],
                                     tf.float32,
                                     tf.random_normal_initializer(stddev=0.02))
            self.b = tf.get_variable("b", [self.profile_size],
                                     initializer=tf.constant_initializer(0.0))
            self.logits = tf.nn.softmax(
                tf.matmul(outputs[-1], self.W) + self.b)
            # call seq2seq.sequence_loss
            self.loss = tf.reduce_sum(tf.abs(self.target_profile -
                                             self.logits))
            self.loss_summary = tf.summary.scalar("loss", self.loss)
            # create a training op using the Adam optimizer
            self.optim = tf.train.AdamOptimizer(0.001,
                                                beta1=0.5).minimize(self.loss)
Esempio n. 25
0
    def _init_seq2seq(self, encoder_inputs, decoder_inputs, cell,
                      feed_previous):
        def inference_loop_function(prev, _):
            prev = tf.nn.xw_plus_b(prev, self.w_softmax, self.b_softmax)
            return tf.to_float(
                tf.equal(
                    prev,
                    tf.reduce_max(prev, reduction_indices=[1],
                                  keep_dims=True)))

        loop_function = inference_loop_function if feed_previous else None

        with variable_scope.variable_scope('seq2seq'):
            _, final_enc_state = static_rnn(cell,
                                            encoder_inputs,
                                            dtype=dtypes.float32)
            return rnn_decoder(decoder_inputs,
                               final_enc_state,
                               cell,
                               loop_function=loop_function)
Esempio n. 26
0
    def model(self):
        """
        Core Network of the RAM
        :return: Sequence of hidden states of the RNN
        """
        self.location_list = []
        self.location_mean_list = []
        self.location_stddev_list = []
        self.glimpses_list = []
        # Create LSTM Cell
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.hs_size,
                                                 activation=tf.nn.relu,
                                                 state_is_tuple=True)
        initial_state = lstm_cell.zero_state(self.batch_size, tf.float32)

        # Initial location mean generated by initial hidden state of RNN
        initial_loc = self.hard_tanh(tf.matmul(initial_state[0], self.h_l_out))
        #initial_std = self.hard_sigmoid(tf.matmul(initial_state[0], self.h_l_std_out))
        initial_std = tf.nn.sigmoid(
            tf.matmul(initial_state[0], self.h_l_std_out))
        sample_loc = self.hard_tanh(initial_loc + tf.cond(
            self.training, lambda: tf.random_normal(initial_loc.get_shape(), 0,
                                                    initial_std), lambda: 0.))
        loc = sample_loc * self.pixel_scaling

        self.location_mean_list.append(tf.reduce_sum(initial_loc, 1))
        self.location_stddev_list.append(tf.reduce_sum(initial_std, 1))
        self.location_list.append(tf.reduce_sum(sample_loc, 1))
        self.eval_location_list.append(loc)

        # Compute initial glimpse
        initial_glimpse = self.Glimpse_Net(loc)

        inputs = [initial_glimpse]
        inputs.extend([0] * (self.glimpses - 1))
        outputs, _ = seq2seq.rnn_decoder(inputs,
                                         initial_state,
                                         lstm_cell,
                                         loop_function=self.get_next_input)

        return outputs
Esempio n. 27
0
    def pre_processing(self, args, cell, training):
        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
        embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)
        if training and args.output_keep_prob:
            inputs = tf.nn.dropout(inputs, args.output_keep_prob)
        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell,
                                                         loop_function=loop if not training else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
        # output layer
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example(
            [self.logits],
            [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])])
        with tf.name_scope('cost'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        with tf.name_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        tf.summary.histogram('logits', self.logits)
        tf.summary.histogram('loss', loss)
        tf.summary.scalar('train_loss', self.cost)
    def compute_profile_from_within(self, x):
        with tf.variable_scope("qualRNN", reuse=True):
            # inputs
            self.s_inputs = x
            s_onehot = tf.one_hot(self.s_inputs,
                                  self.vocab_size,
                                  name="s_input_onehot")
            s_onehot = tf.split(s_onehot, self.sample_sequence_length, 1)
            s_onehot = [tf.squeeze(input_, [1]) for input_ in s_onehot]

            # initialize
            self.s_initial_state = self.stacked_cells.zero_state(
                self.sample_batch_size, tf.float32)

            # call seq2seq.rnn_decoder
            s_outputs, self.s_final_state = seq2seq.rnn_decoder(
                s_onehot, self.s_initial_state, self.stacked_cells)

            # transform the list of state outputs to a list of logits.
            # use a linear transformation.
            # s_outputs = tf.reshape(s_outputs, [1, self.state_dim])
            self.s_probs = tf.matmul(s_outputs[-1], self.W) + self.b
        return self.s_probs
Esempio n. 29
0
def rnn_decoder_custom_embedding(emb_inp,
                                 initial_state,
                                 cell,
                                 embedding,
                                 num_symbols,
                                 output_projection=None,
                                 feed_previous=False,
                                 update_embedding_for_previous=True,
                                 scope=None,
                                 is_fed_h=True):

    with variable_scope.variable_scope(scope
                                       or "embedding_rnn_decoder") as scope:
        if output_projection is not None:
            dtype = scope.dtype
            proj_weights = ops.convert_to_tensor(output_projection[0],
                                                 dtype=dtype)
            proj_weights.get_shape().assert_is_compatible_with(
                [None, num_symbols])
            proj_biases = ops.convert_to_tensor(output_projection[1],
                                                dtype=dtype)
            proj_biases.get_shape().assert_is_compatible_with([num_symbols])

        # embedding = variable_scope.get_variable("embedding",
        #                                         [num_symbols, embedding_size])
        loop_function = _extract_argmax_and_embed(
            embedding,
            initial_state[0],
            output_projection,
            update_embedding_for_previous,
            is_fed_h=is_fed_h) if feed_previous else None

        return rnn_decoder(emb_inp,
                           initial_state,
                           cell,
                           loop_function=loop_function)
Esempio n. 30
0
    def __init__(self, args, training=True):
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        elif args.model == 'nas':
            cell_fn = rnn.NASCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cells = []
        for _ in range(args.num_layers):
            cell = cell_fn(args.rnn_size)
            if training and (args.output_keep_prob < 1.0
                             or args.input_keep_prob < 1.0):
                cell = rnn.DropoutWrapper(
                    cell,
                    input_keep_prob=args.input_keep_prob,
                    output_keep_prob=args.output_keep_prob)
            cells.append(cell)

        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        embedding = tf.get_variable("embedding",
                                    [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        # dropout beta testing: double check which one should affect next line
        if training and args.output_keep_prob:
            inputs = tf.nn.dropout(inputs, args.output_keep_prob)

        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if not training else None,
            scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])

        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])])
        with tf.name_scope('cost'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        with tf.name_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # instrument tensorboard
        tf.summary.histogram('logits', self.logits)
        tf.summary.histogram('loss', loss)
        tf.summary.scalar('train_loss', self.cost)
Esempio n. 31
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        additional_cell_args = {}
        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
            additional_cell_args.update({'state_is_tuple': False})
        elif args.model == 'gridlstm':
            cell_fn = grid_rnn.Grid2LSTMCell
            additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0,
                                         'state_is_tuple': False, 'output_is_tuple': False})
        elif args.model == 'gridgru':
            cell_fn = grid_rnn.Grid2GRUCell
            additional_cell_args.update({'state_is_tuple': False, 'output_is_tuple': False})
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size, **additional_cell_args)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers, state_is_tuple=False)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data),
                                  num_or_size_splits=args.seq_length, axis=1)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell,
                                                  loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, axis=1), [-1, args.rnn_size])
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                                                [tf.reshape(self.targets, [-1])],
                                                [tf.ones([args.batch_size * args.seq_length])],
                                                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Esempio n. 32
0
    def __init__(self, args, training=True):
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        elif args.model == 'nas':
            cell_fn = rnn.NASCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cells = []
        for _ in range(args.num_layers):
            cell = cell_fn(args.rnn_size)
            if training and (args.output_keep_prob < 1.0 or args.input_keep_prob < 1.0):
                cell = rnn.DropoutWrapper(cell,
                                          input_keep_prob=args.input_keep_prob,
                                          output_keep_prob=args.output_keep_prob)
            cells.append(cell)

        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        self.input_data = tf.placeholder(
            tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(
            tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        # dropout beta testing: double check which one should affect next line
        if training and args.output_keep_prob:
            inputs = tf.nn.dropout(inputs, args.output_keep_prob)

        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])


        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example(
                [self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])])
        with tf.name_scope('cost'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        with tf.name_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # instrument tensorboard
        tf.summary.histogram('logits', self.logits)
        tf.summary.histogram('loss', loss)
        tf.summary.scalar('train_loss', self.cost)
Esempio n. 33
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cells = []
        for _ in range(args.num_layers):
            cell = cell_fn(args.rnn_size)
            cells.append(cell)

        self.cell = cell = rnn.MultiRNNCell(cells)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)
        self.batch_pointer = tf.Variable(0, name="batch_pointer", trainable=False, dtype=tf.int32)
        self.inc_batch_pointer_op = tf.assign(self.batch_pointer, self.batch_pointer + 1)
        self.epoch_pointer = tf.Variable(0, name="epoch_pointer", trainable=False)
        self.batch_time = tf.Variable(0.0, name="batch_time", trainable=False)
        tf.summary.scalar("time_batch", self.batch_time)

        def variable_summaries(var):
            """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
            with tf.name_scope('summaries'):
                mean = tf.reduce_mean(var)
                tf.summary.scalar('mean', mean)
                #with tf.name_scope('stddev'):
                #   stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
                #tf.summary.scalar('stddev', stddev)
                tf.summary.scalar('max', tf.reduce_max(var))
                tf.summary.scalar('min', tf.reduce_min(var))
                #tf.summary.histogram('histogram', var)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            variable_summaries(softmax_w)
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            variable_summaries(softmax_b)
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), args.seq_length, 1)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        tf.summary.scalar("cost", self.cost)
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))