Example #1
0
    def _apply_droput_wrapper(self):
        cells = []
        for _ in range(self.num_layers):
            cell = self.__new_cell()
            cell = DropoutWrapper(cell,
                                  input_keep_prob=self.in_keep_prob,
                                  output_keep_prob=self.out_keep_prob)
            cells.append(cell)
        self.multi_cell = MultiRNNCell(cells)

        self.initial_state = rnn_placeholders(
            self.multi_cell.zero_state(self.batch_size, tf.float32))

        self.zero_state = self.multi_cell.zero_state(self.batch_size,
                                                     tf.float32)
Example #2
0
def Stack_LSTM(inputs, lengths, is_training=False):
    cell_List = []
    for index in range(hp.Speaker_Embedding.LSTM.Nums):
        new_Cell = ZoneoutLSTMCell(
            num_units=hp.Speaker_Embedding.LSTM.Cell_Size,
            num_proj=None if hp.Speaker_Embedding.LSTM.Cell_Size
            == hp.Speaker_Embedding.Embedding_Size else
            hp.Speaker_Embedding.Embedding_Size,
            activation=tf.tanh,
            is_training=is_training,
            cell_zoneout_rate=hp.Speaker_Embedding.LSTM.Zoneout_Rate,
            output_zoneout_rate=hp.Speaker_Embedding.LSTM.Zoneout_Rate,
            name='lstmcell_{}'.format(index))
        if hp.Speaker_Embedding.LSTM.Use_Residual and index < hp.Speaker_Embedding.LSTM.Nums - 1:
            new_Cell = ResidualWrapper(new_Cell)

        cell_List.append(new_Cell)

    with tf.variable_scope('lstm'):
        new_Tensor, _ = tf.nn.dynamic_rnn(
            cell=MultiRNNCell(cell_List),
            inputs=inputs,
            sequence_length=lengths,
            dtype=tf.float32,
        )

    return new_Tensor
Example #3
0
    def build_encoder_cell(self):
        '''
            构建单独的编码器cell。
            根据深度,需要多少层网络。
            :return:
        '''

        multi_cell = MultiRNNCell([
            self.build_single_cell(self.hidden_units,
                                   use_residual=self.use_residual)
            for _ in range(self.depth)
        ])

        print("in build_encoder_cell")
        print(hasattr(multi_cell, 'output_size'))
        print(hasattr(multi_cell, 'state_size'))
        return multi_cell
Example #4
0
    def build_encoder_cell(self):
        '''
        构建单独的编码器cell。
        根据深度,需要多少层网络。
        :return:
        '''

        multi_cell =  MultiRNNCell([
            self.build_single_cell(
                self.hidden_units,
                use_residual=self.use_residual
            )
            for _ in range(self.depth)
        ]
        )

        """RNN cell composed sequentially of multiple simple cells.

        Example:

        ```python
        num_units = [128, 64]
        cells = [BasicLSTMCell(num_units=n) for n in num_units]
        stacked_rnn_cell = MultiRNNCell(cells)
        ```
        """
        # num_units = []
        # for i in range(self.depth):
        #     num_units.append(self.hidden_units)
        # print('num_units 的数目',num_units)
        #
        # cells = [self.build_single_cell(n_hidden=n,use_residual=self.use_residual) for n in num_units]
        # print(cells,'shifou为None')
        # print(tuple(cell.state_size for cell in cells))
        # print(cells[-1].output_size)
        #
        # multi_cell = MultiRNNCell(cells)


        print("in build_encoder_cell")
        print(hasattr(multi_cell,'output_size'))
        print(hasattr(multi_cell,'state_size'))
        return multi_cell
Example #5
0
    def build_model(self):
        """Build the lstm model."""
        logging.info("Building model...")

        X = tf.placeholder("int32", shape=[None, self.maxlen], name="x")
        y_ = tf.placeholder(tf.float64,
                            shape=[None, self.num_classes],
                            name="y_true")
        with tf.name_scope("embedding"):
            embedding = tf.get_variable("embedding",
                                        dtype=tf.float64,
                                        initializer=self.emb_matrix)

        with tf.name_scope("lstm"):
            # inputs: [batch_size, maxlen, embedding_dim]
            # outputs: [batch_size, maxlen, h1_inputs]
            inputs = tf.nn.embedding_lookup(embedding, X, name="inputs")
            if self.mode == "basic-lstm":
                cell = BasicLSTMCell(self.lstm_output_size, name="cell")
                outputs, state = tf.nn.dynamic_rnn(cell,
                                                   inputs,
                                                   dtype=tf.float64)
            elif self.mode == "bi-lstm":
                cell_fw = BasicLSTMCell(self.lstm_output_size, name="cell")
                cell_bw = BasicLSTMCell(self.lstm_output_size, name="cell")
                # (output_fw, output_bw), output_states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, dtype=tf.float64)
                # outputs = tf.concat((output_fw, output_bw), 2)
                (outputs, output_state_fw,
                 output_state_bw) = tf.nn.static_bidirectional_rnn(
                     cell_fw, cell_bw, inputs, dtype=tf.float32)
            elif self.model == "two-lstm":
                cells = [BasicLSTMCell(n) for n in [300, 150]]
                stacked_rnn_cell = MultiRNNCell(cells)
                outputs, state = tf.nn.dynamic_rnn(stacked_rnn_cell,
                                                   inputs,
                                                   dtype=tf.float64)

        with tf.name_scope("fc"):
            if self.mode == "basic_lstm":
                w = tf.get_variable(
                    "w",
                    shape=[self.lstm_output_size, self.num_classes],
                    dtype=tf.float64)
            elif self.mode == "bi-lstm":
                w = tf.get_variable("w",
                                    shape=[600, self.num_classes],
                                    dtype=tf.float64)
            elif self.model == "two-lstm":
                w = tf.get_variable("w",
                                    shape=[150, self.num_classes],
                                    dtype=tf.float64)
            b = tf.get_variable("b",
                                shape=[self.num_classes],
                                dtype=tf.float64)
            act = tf.matmul(outputs[:, -1, :], w) + b
            y = tf.nn.softmax(act)
            tf.summary.histogram("w", w)
            tf.summary.histogram("b", b)

        with tf.name_scope("train"):
            cross_entropy = tf.reduce_mean(
                -tf.reduce_sum(y_ * tf.log(y), axis=[1]))

            # Define train step.
            train_step = tf.train.AdagradOptimizer(0.1).minimize(cross_entropy)

            # Define accuracy.
            correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))

            tf.summary.scalar("cross_entropy", cross_entropy)
            tf.summary.scalar("accuracy", accuracy)

        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        summ_acc = tf.summary.merge_all(scope="train")
        summ_fc = tf.summary.merge_all(scope="fc")

        writer = tf.summary.FileWriter("tmp")
        writer.add_graph(sess.graph)

        logging.info("Training model...")
        data_size = self.x_train.shape[0]
        s = time.time()
        for i in range(10):
            print("epoch {}".format(i))
            j = 0
            while j + 100 < data_size:
                batch_xs = self.x_train[j:j + 100]
                batch_ys = self.y_train[j:j + 100]
                sess.run(train_step, feed_dict={X: batch_xs, y_: batch_ys})
                j += 100

                if j % 10000 == 0:
                    summ_fc_tmp = sess.run(summ_fc,
                                           feed_dict={
                                               X: batch_xs,
                                               y_: batch_ys
                                           })
                    summ_acc_tmp = sess.run(summ_acc,
                                            feed_dict={
                                                X: self.x_test,
                                                y_: self.y_test
                                            })
                    writer.add_summary(summ_fc_tmp,
                                       global_step=j + i * data_size)
                    writer.add_summary(summ_acc_tmp,
                                       global_step=j + i * data_size)

            acc = sess.run(accuracy,
                           feed_dict={
                               X: self.x_test,
                               y_: self.y_test
                           })
            logging.info("Accuracy: {}".format(acc))
        t = time.time()
        logging.info("Train model use {}s".format(t - s))
Example #6
0
class CharToChar():
    def __init__(self,
                 name,
                 units,
                 train_batch,
                 hot_dimen,
                 num_layers=3,
                 cell_type='lstm',
                 in_keep_prob_val=0.7,
                 out_keep_prob_val=0.7,
                 learning_rate=1e-2,
                 optimizer='adam',
                 use_grad_clip=False,
                 grad_clip_val=5.0):

        self.name = name
        self.units = units
        self.train_batch = train_batch
        self.num_layers = num_layers
        self.in_keep_prob_val = in_keep_prob_val
        self.out_keep_prob_val = out_keep_prob_val
        self.cell_type = cell_type
        self.hot_dimen = hot_dimen
        self.learning_rate = learning_rate
        self.optimizer = optimizer
        self.grad_clip_val = grad_clip_val
        self.use_grad_clip = use_grad_clip

        self.multi_cell = None
        self.initial_state = None
        self.input_placeholder = None
        self.output_placeholder = None
        self.outputs_raw = None
        self.logits = None
        self.predictions = None
        self.entropy_loss = None
        self.grad_update = None
        self.final_state = None
        self.sess = None
        self.init = None
        self.zero_state = None
        self.in_keep_prob = None
        self.out_keep_prob = None

        self.is_training_done = False

        tf.reset_default_graph()
        self._build_placeholders()
        self._apply_droput_wrapper()
        self._static_unroll()
        self._reshape_and_unstack()
        self._build_optimizer_and_finalize_graph()

    def __new_cell(self):
        if self.cell_type == 'lstm':
            return BasicLSTMCell(self.units)
        elif self.cell_type == 'rnn':
            return BasicRNNCell(self.units)
        else:
            return GRUCell(self.units)

    def _apply_droput_wrapper(self):
        cells = []
        for _ in range(self.num_layers):
            cell = self.__new_cell()
            cell = DropoutWrapper(cell,
                                  input_keep_prob=self.in_keep_prob,
                                  output_keep_prob=self.out_keep_prob)
            cells.append(cell)
        self.multi_cell = MultiRNNCell(cells)

        self.initial_state = rnn_placeholders(
            self.multi_cell.zero_state(self.batch_size, tf.float32))

        self.zero_state = self.multi_cell.zero_state(self.batch_size,
                                                     tf.float32)

    def _build_placeholders(self):
        self.batch_size = tf.placeholder(tf.int32, [])

        self.input_placeholder = tf.placeholder(tf.float32,
                                                shape=[None, self.hot_dimen])

        self.output_placeholder = tf.placeholder(tf.float32,
                                                 shape=[None, self.hot_dimen])

        self.in_keep_prob = tf.placeholder(tf.float32, [])

        self.out_keep_prob = tf.placeholder(tf.float32, [])

    def _static_unroll(self):
        self.outputs_raw, self.final_state = tf.nn.static_rnn(
            cell=self.multi_cell,
            inputs=[self.input_placeholder],
            dtype=tf.float32,
            initial_state=self.initial_state)
        self.outputs_raw = self.outputs_raw[0]

    def get_shared_variable(self, var, shape=None):
        with tf.variable_scope('softmax_dense', reuse=tf.AUTO_REUSE):
            v = tf.get_variable(var, shape)
        return v

    def __apply_dense(self, time_step):
        w = self.get_shared_variable('W', shape=[self.units, self.hot_dimen])
        b = self.get_shared_variable('B', shape=[self.hot_dimen])
        return tf.matmul(time_step, w) + b

    def _reshape_and_unstack(self):
        self.logits = self.__apply_dense(self.outputs_raw)
        predictions = tf.nn.softmax(self.logits)
        self.predictions = predictions
        self.entropy_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=self.output_placeholder, logits=self.logits))

    def _build_optimizer_and_finalize_graph(self):
        opt = None
        if self.optimizer.lower() == 'adam':
            opt = tf.train.AdamOptimizer
        elif self.optimizer.lower() == 'rms':
            opt = tf.train.RMSPropOptimizer
        else:
            opt = tf.train.AdamOptimizer
        if self.use_grad_clip:
            grad_vars = opt.compute_gradients(self.entropy_loss)
            grad_clip_const = tf.constant(self.grad_clip_val,
                                          name='grad_clipper')
            clipped_grad_var = [(tf.clip_by_value(grad, -grad_clip_const,
                                                  grad_clip_const), var)
                                for grad, var in grad_vars]
            self.grad_update = opt.apply_gradients(clipped_grad_var)
        else:
            self.grad_update = opt(
                self.learning_rate).minimize(loss=self.entropy_loss)

    def start_session(self):
        self.sess = tf.Session()
        return self.sess

    def train(self, file_pipe, session=None, print_loss_after_iterations=50):

        self.sess = session or tf.Session()
        self.init = tf.global_variables_initializer()
        if isinstance(file_pipe, FilePipeline):
            self.is_training_done = True
            assert self.hot_dimen == file_pipe.get_distinct_char_count()
            self.sess.run(self.init)
            state = self.sess.run(self.zero_state,
                                  feed_dict={
                                      self.batch_size: self.train_batch,
                                      self.in_keep_prob: self.in_keep_prob_val,
                                      self.out_keep_prob:
                                      self.out_keep_prob_val
                                  })
            all_epoch_done = False
            i = 0
            p_bar = tqdm(total=file_pipe.get_expected_total_iteration())
            p_bar.update(i)
            p_bar.set_description("Iteration")
            while not all_epoch_done:
                data, all_epoch_done = file_pipe.next_batch()
                feeder = {
                    self.input_placeholder: data[0],
                    self.output_placeholder: data[1],
                    self.initial_state: state,
                    self.batch_size: self.train_batch,
                    self.in_keep_prob: self.in_keep_prob_val,
                    self.out_keep_prob: self.out_keep_prob_val
                }
                # if i % print_loss_after_iterations == 0:
                #     state, loss, _ = self.sess.run(
                #         [self.final_state, self.entropy_loss, self.grad_update], feed_dict=feeder)
                #     print('At Iteration {} = {}'.format(i, loss))
                # else:
                state, _ = self.sess.run([self.final_state, self.grad_update],
                                         feed_dict=feeder)
                i += 1
                p_bar.update(1)
        else:
            raise ValueError(
                "Cannot train the model. file_pipe is not an instance of FilePipeline"
            )

    def recycle(self):
        self.sess.close()

    def sample(self, f_pipe, seq_len=5, save_as_file=None):
        if not self.is_training_done:
            raise ValueError(
                "You must train the model before sampling sequences.")
        else:
            state = self.sess.run(self.initial_state,
                                  feed_dict={
                                      self.batch_size: 1,
                                      self.in_keep_prob: 1.0,
                                      self.out_keep_prob: 1.0
                                  })
            inp = np.zeros((1, self.hot_dimen))
            result = []
            for _ in range(seq_len):
                feeder = {
                    self.input_placeholder: inp,
                    self.initial_state: state,
                    self.batch_size: 1,
                    self.in_keep_prob: 1.0,
                    self.out_keep_prob: 1.0
                }
                inp, state = self.sess.run(
                    [self.predictions, self.final_state], feed_dict=feeder)
                # pylint:disable=E1101
                x = np.random.choice(self.hot_dimen, p=np.squeeze(inp))
                inp = np.zeros((1, self.hot_dimen))
                inp[0, x] = 1
                result.append(x)
            preditions_to_string(f_pipe, result, save_as_file)

    def load_saved_checkpoints(self, version, folder=None):
        saver = tf.train.Saver()
        self.is_training_done = True
        if folder is None:
            saver.restore(self.sess,
                          './saved-v' + str(version) + '/' + self.name)
        else:
            saver.restore(self.sess,
                          './' + folder + str(version) + '/' + self.name)

    def dump_model_checkpoints(self, version, folder=None):
        saver = tf.train.Saver()
        if folder is None:
            saver.save(self.sess, './saved-v' + str(version) + '/' + self.name)
        else:
            saver.save(self.sess,
                       './' + folder + str(version) + '/' + self.name)

    def to_json(self, path='.', file_name=None):
        f_name = file_name or self.name
        config = [
            "name", "units", "train_batch", "hot_dimen", "num_layers",
            "cell_type", "in_keep_prob_val", "out_keep_prob_val",
            "learning_rate", "optimizer", "use_grad_clip", "grad_clip_val"
        ]
        data = {k: v for k, v in self.__dict__.items() if k in config}
        with open(os.path.join(path, f_name) + '.json', mode='w') as f:
            json.dump(data, f)

    @staticmethod
    def from_json(path, file_name):
        data = None
        with open(os.path.join(path, file_name)) as f:
            data = json.load(f)
        return CharToChar(**data)
Example #7
0
    def __init__(
        self,
        batch_size,
        inputs,
        outputs,
        num_units,
        cell_type
    ):
        """
    Args:
      num_hidden : number of hidden elements of each LSTM unit.
      inputs : a list (tensor array) of input tensors with size hp.num_time_steps*(batch_size,dim)
      cell : an rnn cell object (the default option is tf.python.ops.rnn_cell.LSTMCell)
      reverse : Option to decode in reverse order
      decode_without_input : Option to decode without input - there are zeros coming to the cell instead of input
    """

        self.batch_size = batch_size
        self.num_inputs = inputs[0].get_shape().as_list()[1]
        self.num_outputs = self.num_inputs
        num_time_steps = len(inputs)

        num_hidden = num_units[-1]
        self.last = inputs[-1]

        if len(num_units) > 1:
            cells = [LSTMCell(num_units=n) for n in num_units]
            self._lstm_cell = MultiRNNCell(cells)
        else:
            self._lstm_cell = LSTMCell(num_hidden)

        with tf.compat.v1.variable_scope('encoder') as ec:
            Wy = tf.Variable(tf.random.truncated_normal([num_hidden,
                                                         self.num_outputs], dtype=tf.float32), name='enc_weight'
                             )
            by = tf.Variable(tf.random.truncated_normal([self.num_outputs],
                                                        dtype=tf.float32), name='enc_bias')

            init_states = []
            for i in range(len(num_units)):
                init_c = tf.zeros((batch_size, num_units[i]))
                init_h = init_c
                layer = tf.contrib.rnn.LSTMStateTuple(init_c, init_h)
                init_states.append(layer)
            init_states = tuple(init_states)

            if len(num_units) > 1:
                lstm_state = init_states
            else:
                lstm_state = init_states[0]

            lstm_outputs = []
            for step in range(len(inputs)):
                if step > 0:
                    ec.reuse_variables()
                lstm_input = inputs[step]
                (lstm_output, lstm_state) = self._lstm_cell(
                    lstm_input, lstm_state)
            for step in range(len(outputs)):
                lstm_input = tf.matmul(lstm_output, Wy) + by
                lstm_outputs.append(lstm_input)
                (lstm_output, lstm_state) = self._lstm_cell(
                    lstm_input, lstm_state)

            self.prediction = tf.transpose(
                tf.stack(lstm_outputs), [1, 0, 2], name='prediction')
            self.target = tf.transpose(
                tf.stack(outputs), [1, 0, 2], name='target')
            self.input_ = tf.transpose(tf.stack(inputs), [1, 0, 2])
            self.prediction = self.prediction[:, :, 0]
            self.target = self.target[:, :, 0]
            self.enc_W = Wy
            self.enc_b = by
Example #8
0
    def __init__(
        self,
        batch_size,
        inputs,
        outputs,
        num_units,
        cell_type
    ):
        """
    Args:
      inputs : a list (tensor array) of input tensors with size hp.num_time_steps*(batch_size,dim)
      cell : an rnn cell object (the default option is tf.python.ops.rnn_cell.LSTMCell)
      reverse : Option to decode in reverse order
      decode_without_input : Option to decode without input - there are zeros coming to the cell instead of input
    """

        self.batch_size = batch_size
        self.num_inputs = inputs[0].get_shape().as_list()[1]
        self.num_outputs = self.num_inputs

        num_hidden = num_units[-1]

        if len(num_units) > 1:
            if cell_type == 'GRU':
                cells = [GRUCell(num_units=n) for n in num_units]
            else:
                cells = [LSTMCell(num_units=n) for n in num_units]
            self._enc_cell = MultiRNNCell(cells)
            self._dec_cell = MultiRNNCell(cells)
        else:
            if cell_type == 'GRU':
                self._enc_cell = GRUCell(num_hidden)
                self._dec_cell = GRUCell(num_hidden)
            else:
                self._enc_cell = LSTMCell(num_hidden)
                self._dec_cell = LSTMCell(num_hidden)

        # , initializer=tf.contrib.layers.xavier_initializer()
        with tf.compat.v1.variable_scope('encoder') as es:
            enc_W = tf.Variable(tf.random.truncated_normal([num_hidden,
                                                            self.num_outputs], dtype=tf.float32), name='enc_weight'
                                )
            enc_b = tf.Variable(tf.random.truncated_normal([self.num_outputs],
                                                           dtype=tf.float32), name='enc_bias')

            init_states = []
            if cell_type == 'GRU':
                for i in range(len(num_units)):
                    layer = tf.zeros((batch_size, num_units[i]))
                    init_states.append(layer)
            else:
                # make the zero initial cell and hidden state as a tuple - in the shape LSTM cell expects it to be
                for i in range(len(num_units)):
                    init_c = tf.zeros((batch_size, num_units[i]))
                    init_h = init_c
                    layer = tf.contrib.rnn.LSTMStateTuple(init_c, init_h)
                    init_states.append(layer)
                init_states = tuple(init_states)

            if len(num_units) > 1:
                enc_state = init_states
            else:
                enc_state = init_states[0]

            enc_predictions = []
            for step in range(len(inputs)):
                if step > 0:
                    es.reuse_variables()
                enc_input = inputs[step]
                (enc_output, enc_state) = self._enc_cell(
                    enc_input, enc_state)  # lstm_output = hidden state, lstm_state = tuple(cell state, hidden state)
                #y_hat = Wy*h + by
                enc_prediction = tf.matmul(enc_output, enc_W) + enc_b
                enc_predictions.append(enc_prediction)

        with tf.compat.v1.variable_scope('decoder') as vs:
            dec_W = tf.Variable(tf.random.truncated_normal([num_hidden,
                                                            self.num_outputs], dtype=tf.float32), name='dec_weight'
                                )

            dec_b = tf.Variable(tf.random.truncated_normal([self.num_outputs],
                                                           dtype=tf.float32), name='dec_bias')

            dec_input = enc_prediction
            dec_state = enc_state
            dec_outputs = []
            for step in range(len(outputs)):
                if step > 0:
                    vs.reuse_variables()
                (dec_input, dec_state) = self._dec_cell(
                    dec_input, dec_state)
                dec_input = tf.matmul(dec_input, dec_W) + dec_b
                dec_outputs.append(dec_input)
            self.prediction = tf.transpose(
                tf.stack(dec_outputs), [1, 0, 2], name='prediction')

        self.input_ = tf.transpose(tf.stack(inputs), [1, 0, 2])
        self.target = tf.transpose(tf.stack(outputs), [1, 0, 2], name='target')
        self.prediction = self.prediction[:, :, 0]
        self.target = self.target[:, :, 0]
        self.enc_W = enc_W
        self.enc_b = enc_b
        self.dec_W = dec_W
        self.dec_b = dec_b
Example #9
0
def _LSTMCells(unit_list, act_fn_list):
    return MultiRNNCell([
        LSTMCell(unit, activation=act_fn)
        for unit, act_fn in zip(unit_list, act_fn_list)
    ])
Example #10
0
 def build_cell_layer(self):
     building_cell = self.build_single_cell()
     return MultiRNNCell([building_cell for i in range(self.depth)])
    def build_decoder_cell(self, encoder_outputs, encoder_states):
        '''

        构建解码器的cell,返回一个解码器的cell和解码器初始化状态。
        :param encoder_outputs:

        :param encoder_state:
        :return:
        '''
        encoder_input_length = self.encoder_inputs_length
        batch_size = self.batch_size

        if self.bidirectional:
            encoder_states = encoder_states[-self.depth:]

        if self.time_major:
            encoder_outputs = tf.transpose(encoder_outputs, (1, 0, 2))

        assert encoder_input_length is not None, 'encoder_state_length 不能为空'
        assert isinstance(batch_size, int), 'batchsize的值必须为int类型'
        assert encoder_outputs is not None, 'encoder_outputs is not None'
        assert encoder_states is not None, 'encoder_state is not None'
        #########################使用beamsearch的情况#####################################################
        if self.use_beamsearch_decode:
            '''这个tile_batch 会将tensor复制self.beam_with 份,相当于是
            batch的数据变成了原来的self.beam_width 倍
            '''
            encoder_outputs = seq2seq.tile_batch(
                encoder_outputs, multiplier=self.beam_width
            )
            encoder_states = seq2seq.tile_batch(
                encoder_states, multiplier=self.beam_width
            )
            encoder_input_length = seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width
            )
            # 如果使用了beamsearch,那么输入应该是beam_width的倍数乘以batch_size
            batch_size *= self.beam_width
        #########################使用beamsearch的情况#####################################################

        #########################使用注意力机制###########################################################
        if self.attention_type.lower() == 'luong':
            self.attention_mechanism = LuongAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_input_length
            )
        else:
            self.attention_mechanism = BahdanauAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_input_length
            )  # 双向LSTM的话encoder_outputs 就是它的隐藏状态h1
        #########################使用注意力机制###########################################################

        cell = MultiRNNCell(
            [
                self.build_single_cell(
                    self.hidden_units,
                    use_residual=self.use_residual
                )
                for _ in range(self.depth)
            ])
        # 这个cell就是多层的。

        alignment_history = (
                self.mode != 'train' and not self.use_beamsearch_decode
        )

        # alignment_history在不是训练状态以及没有使用beamsearch的时候使用。

        def cell_input_fn(inputs, attention):
            '''
            根据attn_input_feeding属性来判断是否在attention计算前进行一次投影的计算
            使用注意力机制才会进行的运算
            :param inputs:
            :param attention:
            :return:
            '''

            if not self.use_residual:
                print(inputs.get_shape, 'inputs_shape')
                print(attention.get_shape, 'inputs_shape')
                print(array_ops.concat([inputs, attention], -1), 'inputs和attention拼接之后的形状')
                return array_ops.concat([inputs, attention], -1)

            attn_projection = layers.Dense(self.hidden_units,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')

            '''
            这个attn_projection(array_ops.concat([inputs,attention],-1))我的理解就是
            layers.Dense(self.hidden_units,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')(array_ops.concat([inputs,attention],-1))
            Dense最终继承了Layer类,Layer中定义了call方法和__call__ 方法,Dense也重写了call方法,__call__方法中调用call方法,call方法中还是起一个全连接层层的作用,__call__
            方法中执行流程是:pre process,call,post process
            '''
            return attn_projection(array_ops.concat([inputs, attention], -1))

        cell = AttentionWrapper(
            cell=cell,
            attention_mechanism=self.attention_mechanism,
            attention_layer_size=self.hidden_units,
            alignment_history=alignment_history,  # 这个是attention的历史信息
            cell_input_fn=cell_input_fn,  # 将attention拼接起来和input拼接起来
            name='Attention_Wrapper'
        )  # AttentionWrapper 注意力机制的包裹器

        decoder_initial_state = cell.zero_state(
            batch_size, tf.float32
        )  # 这里初始化decoder_inital_state

        # 传递encoder的状态
        decoder_initial_state = decoder_initial_state.clone(
            cell_state=encoder_states
        )

        return cell, decoder_initial_state
Example #12
0
x = tf.placeholder(dtype=tf.int32, shape=[None, None])
y = tf.placeholder(dtype=tf.int64, shape=[None])
sequence_length = tf.placeholder(dtype=tf.int32, shape=[None])
keep_prob = tf.placeholder(dtype=tf.float32)
num_units = 100
n_epoch = 100

with tf.variable_scope('embedding'):
    rnn_input = tf.contrib.layers.embed_sequence(x,
                                                 vocab_size=embed_ingred_size,
                                                 embed_dim=embed_size)

with tf.variable_scope('rnn'):
    cell = GRUCell(num_units)
    cell = DropoutWrapper(cell, output_keep_prob=keep_prob)
    cell = MultiRNNCell([cell for _ in range(num_layers)])

    outputs, states = tf.nn.dynamic_rnn(cell,
                                        rnn_input,
                                        dtype=tf.float32,
                                        sequence_length=sequence_length)
    # ★Attention
    # 'outputs' is a tensor of shape [batch_size, max_time, num_of_units]
    # 'state' is a N-tuple where N is the number of GRUCells containing a
    # tf.contrib.rnn.GRUcells for each cell

with tf.variable_scope('full_connected'):
    state = states[-1]
    fc = tf.contrib.layers.fully_connected(state,
                                           num_class,
                                           activation_fn=None)
Example #13
0
    def build_decoder_cell(self,encoder_outputs,encoder_state):
        '''

        构建解码器的cell
        :param encoder_outputs:
        :param encoder_state:
        :return:
        '''
        encoder_input_length = self.encoder_inputs_length
        batch_size = self.batch_size

        if self.bidirectional:
            encoder_state = encoder_state[-self.depth:]

        if self.time_major:
            encoder_outputs = tf.transpose(encoder_outputs,(1,0,2))

        if self.use_beamsearch_decode:
            '''这个tile_batch 会将tensor复制self.beam_with 份,相当于是
            batch的数据变成了原来的self.beam_width 倍
            '''
            encoder_outputs = seq2seq.tile_batch(
                encoder_outputs,multiplier=self.beam_width
            )
            encoder_state = seq2seq.tile_batch(
                encoder_state,multiplier=self.beam_width
            )


            encoder_input_length = seq2seq.tile_batch(
                self.encoder_inputs_length,multiplier=self.beam_width
            )

            #如果使用了beamsearch,那么输入应该是beam_width的倍数乘以batch_size
            batch_size *=self.beam_width


        if self.attention_type.lower() == 'luong':
            self.attention_mechanism = LuongAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_input_length
            )
        else:
            self.attention_mechanism = BahdanauAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_input_length
            )#这里的memory 觉得传递得有问题,为什么不是encoder_state呢?

        cell = MultiRNNCell(
            [
                self.build_single_cell(
                    self.hidden_units,
                    use_residual=self.use_residual
                )

                for _ in range(self.depth)
            ])

        alignment_history = (
            self.mode != 'train' and not self.use_beamsearch_decode
        )

        def cell_input_fn(inputs,attention):
            '''
            根据attn_input_feeding属性来判断是否在attention计算前进行一次投影的计算
            :param inputs:
            :param attention:
            :return:
            '''

            if not self.use_residual:
                return array_ops.concat([inputs,attention],-1)

            attn_projection = layers.Dense(self.hidden_units,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')

            '''
            这个attn_projection(array_ops.concat([inputs,attention],-1))我的理解就是
            layers.Dense(self.hidden_units,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')(array_ops.concat([inputs,attention],-1))
            因为Dense内部实际上是定义了__call__(self): 的方法,因此可以这样使用
            '''
            return attn_projection(array_ops.concat([inputs,attention],-1))


        cell = AttentionWrapper(
            cell=cell,
            attention_mechanism=self.attention_mechanism,
            attention_layer_size=self.hidden_units,
            alignment_history=alignment_history,#这个是attention的历史信息
            cell_input_fn=cell_input_fn,#将attention拼接起来和input拼接起来
            name='Attention_Wrapper'
        )#AttentionWrapper 注意力机制的包裹器

        decoder_initial_state = cell.zero_state(
            batch_size,tf.float32
        )#这里初始化decoder_inital_state

        #传递encoder的状态
        decoder_initial_state = decoder_initial_state.clone(
            cell_state = encoder_state
        )

        return cell,decoder_initial_state
def _INDRNNCells(unit_list, time_steps):
    recurrent_max = pow(2, 1 / time_steps)
    return MultiRNNCell([
        IndRNNCell(unit, recurrent_max_abs=recurrent_max) for unit in unit_list
    ],
                        state_is_tuple=True)
num_units = 100
n_epoch = 3000

with tf.variable_scope('embedding'):
    rnn_input = tf.contrib.layers.embed_sequence(x,
                                                 vocab_size=embed_ingred_size,
                                                 embed_dim=embed_size)

with tf.variable_scope('rnn'):
    with tf.variable_scope('forward'):
        fw_cells = [GRUCell(num_units) for _ in range(num_layers)]
        fw_cells = [
            DropoutWrapper(fw_cell, output_keep_prob=keep_prob)
            for fw_cell in fw_cells
        ]
        fw_cells = MultiRNNCell(fw_cells)

    with tf.variable_scope('Backward'):
        bw_cells = [GRUCell(num_units) for _ in range(num_layers)]
        bw_cells = [
            DropoutWrapper(bw_cell, output_keep_prob=keep_prob)
            for bw_cell in bw_cells
        ]
        bw_cells = MultiRNNCell(bw_cells)

    outputs, states = bidirectional_dynamic_rnn(
        fw_cells,
        bw_cells,
        rnn_input,
        dtype=tf.float32,
        sequence_length=sequence_length)