Ejemplo n.º 1
0
def _create_rnn_cell(n_neurons, n_layers, keep_prob):
    """Summary

    Parameters
    ----------
    n_neurons : TYPE
        Description
    n_layers : TYPE
        Description
    keep_prob : TYPE
        Description

    Returns
    -------
    TYPE
        Description
    """
    import tensorflow.contrib.rnn as rnn

    cell_fw = rnn.LayerNormBasicLSTMCell(num_units=n_neurons,
                                         dropout_keep_prob=keep_prob)
    # Build deeper recurrent net if using more than 1 layer
    if n_layers > 1:
        cells = [cell_fw]
        for layer_i in range(1, n_layers):
            with tf.variable_scope('{}'.format(layer_i)):
                cell_fw = rnn.LayerNormBasicLSTMCell(
                    num_units=n_neurons, dropout_keep_prob=keep_prob)
                cells.append(cell_fw)
        cell_fw = rnn.MultiRNNCell(cells)
    return cell_fw
Ejemplo n.º 2
0
def _create_rnn_cell(n_neurons, n_layers, keep_prob):
    cell_fw = rnn.LayerNormBasicLSTMCell(
        num_units=n_neurons, dropout_keep_prob=keep_prob)
    # Build deeper recurrent net if using more than 1 layer
    if n_layers > 1:
        cells = [cell_fw]
        for layer_i in range(1, n_layers):
            with tf.variable_scope('{}'.format(layer_i)):
                cell_fw = rnn.LayerNormBasicLSTMCell(
                    num_units=n_neurons, dropout_keep_prob=keep_prob)
                cells.append(cell_fw)
        cell_fw = rnn.MultiRNNCell(cells)
    return cell_fw
Ejemplo n.º 3
0
 def create_cell(device):
   if rnn_type == "GRU":
     cell = rnn.GRUCell(rnn_size)
   elif rnn_type == "LSTM":
     if 'reuse' in inspect.signature(tf.contrib.rnn.BasicLSTMCell.__init__).parameters:
       cell = rnn.LayerNormBasicLSTMCell(rnn_size, forget_bias=0.0, reuse=tf.get_variable_scope().reuse)
     else:
       cell = rnn.LayerNormBasicLSTMCell(rnn_size, forget_bias=0.0)
   elif rnn_type == "RWA":
     cell = RWACell(rnn_size)
   elif rnn_type == "RAN":
     cell = RANCell(rnn_size, normalize=self.is_training)
   cell = SwitchableDropoutWrapper(rnn.DeviceWrapper(cell, device="/gpu:{}".format(device)), is_train=self.is_training)
   return cell
Ejemplo n.º 4
0
    def RNN(self, x, drop, weights, biases):
        # Prepare data shape to match `rnn` function requirements
        # Current data input shape: (batch_size, timesteps, n_input)
        # Required shape: 'timesteps' tensors list of shape (batch_size, n_input)

        # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
        # here we use static_rnn which requires inputs to be a list of tensors
        x = tf.unstack(x, self.timesteps, 1)

        # For dynamic rnn, the require for input data should be in the shape of
        # (batch_size, timesteps, n_input) if time_major is False

        # Define a lstm cell with tensorflow
        # lstm_cell = rnn.LSTMCell(self.num_hidden, use_peepholes=True)
        # lstm_cell = rnn.DropoutWrapper(lstm_cell, output_keep_prob=(1 - drop))
        #
        # lstm_cell = cudnn_rnn.CudnnLSTM(self.num_layers, self.num_hidden,
        # dropout=drop)

        lstm_cell = rnn.LayerNormBasicLSTMCell(self.num_hidden,
                                               forget_bias=0.5,
                                               norm_gain=1.0,
                                               norm_shift=0.0,
                                               dropout_keep_prob=(1 - drop))
        # Get lstm cell output
        outputs, states = tf.nn.static_rnn(lstm_cell, x, dtype=tf.float32)
        # outputs, states = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32)
        # Linear activation, using rnn inner loop last output
        return tf.matmul(outputs[-1], weights["out"]) + biases["out"]
        def create_lstm_cell(layer):
            if hyperparameters.layer_norm:
                if hyperparameters.num_proj:
                    raise Exception(
                        'No support for layer normalization together with projection layer.'
                    )

                cell = rnn.LayerNormBasicLSTMCell(
                    hyperparameters.lstm_state_size,
                    # here, we use the local variable dropout that is set to 0
                    # if we are evaluating.
                    dropout_keep_prob=1 - dropout,
                    layer_norm=hyperparameters.layer_norm)
            else:
                if hyperparameters.num_proj:
                    cell = rnn.LSTMCell(hyperparameters.lstm_state_size,
                                        num_proj=hyperparameters.num_proj)
                else:
                    cell = rnn.LSTMBlockCell(hyperparameters.lstm_state_size,
                                             forget_bias=0)

                if dropout > 0:
                    cell = rnn.DropoutWrapper(cell,
                                              output_keep_prob=1 - dropout)

            return cell
Ejemplo n.º 6
0
def _single_cell(unit_type,
                 num_units,
                 forget_bias,
                 dropout,
                 mode,
                 residual_connection=False,
                 device_str=None,
                 residual_fn=None):
    """
    创建一个RNN单元。
    :param unit_type: RNN类型
    :param num_units: 隐层神经元个数
    :param forget_bias: 遗忘门偏置
    :param dropout: dropout比例
    :param mode: 训练模式(只有train模式下才设置dropout)
    :param residual_connection: 是否使用残差连接
    :param device_str: 设备
    :param residual_fn: 残差方法
    :return:
    """
    # dropout (= 1 - keep_prob) is set to 0 during eval and infer
    dropout = dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0

    # Cell Type
    if unit_type == "lstm":
        print("  LSTM, forget_bias=%g" % forget_bias, end='')
        single_cell = rnn.BasicLSTMCell(num_units, forget_bias=forget_bias)
    elif unit_type == "gru":
        print("  GRU", end='')
        single_cell = rnn.GRUCell(num_units)
    elif unit_type == "layer_norm_lstm":
        print("  Layer Normalized LSTM, forget_bias=%g" % forget_bias, end='')
        single_cell = rnn.LayerNormBasicLSTMCell(num_units,
                                                 forget_bias=forget_bias,
                                                 layer_norm=True)
    elif unit_type == "nas":
        print("  NASCell", end='')
        single_cell = rnn.NASCell(num_units)
    else:
        raise ValueError("Unknown unit type %s!" % unit_type)

    # Dropout (= 1 - keep_prob)
    if dropout > 0.0:
        single_cell = rnn.DropoutWrapper(cell=single_cell,
                                         input_keep_prob=(1.0 - dropout))
        print("  %s, dropout=%g " % (type(single_cell).__name__, dropout),
              end='')

    # Residual
    if residual_connection:
        single_cell = rnn.ResidualWrapper(single_cell, residual_fn=residual_fn)
        print("  %s" % type(single_cell).__name__, end='')

    # Device Wrapper
    if device_str:
        single_cell = rnn.DeviceWrapper(single_cell, device_str)
        print("  %s, device=%s" % (type(single_cell).__name__, device_str),
              end='')

    return single_cell
Ejemplo n.º 7
0
    def _make_rnn_model(self):
        """Make rnn model."""
        self.y = tf.cast(self.x[:, 1:], dtype=tf.int64)
        self.y_emb = tf.one_hot(self.y, depth=self._params.emb_size)
        tf.logging.info('y.shape=%s', self.y.shape)

        lstm_fw_cell_g = contrib_rnn.LayerNormBasicLSTMCell(
            self._params.hidden_lstm_size,
            layer_norm=self._params.norm_lstm,
            dropout_keep_prob=1 - self.dropout_rate)
        lstm_hidden, _ = tf.nn.dynamic_rnn(lstm_fw_cell_g,
                                           self.x_emb,
                                           dtype=tf.float32)
        # stagger two directional vectors so that the backward RNN does not reveal
        # medium.com/@plusepsilon/the-bidirectional-language-model-1f3961d1fb27
        self.logits = tf.layers.dense(lstm_hidden[:, :-1, :],
                                      units=self._params.vocab_size,
                                      activation=None,
                                      name='logits')
        tf.logging.info('shape of logits=%s', self.logits.shape)

        # cross entropy
        self.loss_i_t = tf.nn.softmax_cross_entropy_with_logits(
            labels=self.y_emb, logits=self.logits)
        self.loss_i = tf.reduce_mean(self.loss_i_t, axis=1)
Ejemplo n.º 8
0
def create_cell(num_units,
                reuse=False,
                layer_norm=False,
                cell="gru",
                scope="rnn"):

    with tf.variable_scope(scope):

        if cell == "gru":

            if layer_norm:

                from neural_toolbox.LayerNormBasicGRUCell import LayerNormBasicGRUCell

                rnn_cell = LayerNormBasicGRUCell(num_units=num_units,
                                                 layer_norm=layer_norm,
                                                 activation=tf.nn.tanh,
                                                 reuse=reuse)

            else:
                rnn_cell = tfc_rnn.GRUCell(num_units=num_units,
                                           activation=tf.nn.tanh,
                                           reuse=reuse)

        elif cell == "lstm":
            rnn_cell = tfc_rnn.LayerNormBasicLSTMCell(num_units=num_units,
                                                      layer_norm=layer_norm,
                                                      activation=tf.nn.tanh,
                                                      reuse=reuse)

        else:
            assert False, "Invalid RNN cell"

    return rnn_cell
Ejemplo n.º 9
0
 def __init__(self, word_embeddings, params):
     self._word_embeddings = word_embeddings
     self._params = params
     self._embedding_size = int(self._word_embeddings._dimension_size)
     # self._lstm_cell = rnn.BasicLSTMCell(num_units=self._embedding_size, reuse=tf.AUTO_REUSE)
     self._lstm_cell = rnn.LayerNormBasicLSTMCell(num_units=self._embedding_size, reuse=tf.AUTO_REUSE)
     self._cause_word_table = tf.constant(params['cause_word_table'], name='cause_word_table')
     self._cause_word_table_length = tf.constant(params['cause_word_table_length'], name='cause_word_table_length')
Ejemplo n.º 10
0
    def __define_network(self):

        # encoder
        with tf.variable_scope('encoder'):
            lstm_encoder = tfrnn.LayerNormBasicLSTMCell(self.num_units,
                                                        layer_norm=False)
            initial_state = lstm_encoder.zero_state(batch_size=self.batch_size,
                                                    dtype=tf.float32)

            _, encoder_state = tf.nn.dynamic_rnn(cell=lstm_encoder,
                                                 inputs=self.input,
                                                 initial_state=initial_state)

        # decoder
        with tf.variable_scope('decoder'):
            reversed_input = tf.reverse(self.input, axis=[1])
            reversed_padded_input = tf.pad(reversed_input,
                                           paddings=[[0, 0], [1, 0], [0, 0]])

            lstm_decoder = tfrnn.LayerNormBasicLSTMCell(self.num_units,
                                                        layer_norm=False)

            decoder_outputs, decoder_state = tf.nn.dynamic_rnn(
                cell=lstm_decoder,
                inputs=reversed_padded_input[:, :-1, :],
                initial_state=encoder_state)

        # output layer
        with tf.variable_scope('output'):
            decoder_outputs = tf.reshape(decoder_outputs, [-1, self.num_units])
            output = fc_layer(self.input_size,
                              activation=None,
                              input=decoder_outputs)

        # loss
        self.point_reconstruction_error = tf.squared_difference(
            tf.reshape(reversed_input, [-1, self.input_size]), output)
        self.point_reconstruction_error = tf.reshape(
            self.point_reconstruction_error,
            [self.batch_size, -1, self.input_size])
        self.point_reconstruction_error = tf.reduce_sum(
            self.point_reconstruction_error, axis=-1)
        self.point_reconstruction_error = tf.reverse(
            self.point_reconstruction_error, axis=[1])

        self.loss = tf.reduce_mean(self.point_reconstruction_error)
Ejemplo n.º 11
0
def decoder(x, decoder_inputs, keep_prob, sequence_length, memory,
            memory_length, first_attention):
    with tf.variable_scope("Decoder") as scope:
        label_embeddings = tf.get_variable(name="embeddings",
                                           shape=[n_classes, embedding_size],
                                           dtype=tf.float32)
        train_inputs_embedded = tf.nn.embedding_lookup(label_embeddings,
                                                       decoder_inputs)
        lstm = rnn.LayerNormBasicLSTMCell(n_hidden,
                                          dropout_keep_prob=keep_prob)
        output_l = layers_core.Dense(n_classes, use_bias=True)
        encoder_state = rnn.LSTMStateTuple(x, x)
        attention_mechanism = BahdanauAttention(
            embedding_size,
            memory=memory,
            memory_sequence_length=memory_length)
        cell = AttentionWrapper(lstm,
                                attention_mechanism,
                                output_attention=False)
        cell_state = cell.zero_state(dtype=tf.float32,
                                     batch_size=train_batch_size)
        cell_state = cell_state.clone(cell_state=encoder_state,
                                      attention=first_attention)
        train_helper = TrainingHelper(train_inputs_embedded, sequence_length)
        train_decoder = BasicDecoder(cell,
                                     train_helper,
                                     cell_state,
                                     output_layer=output_l)
        decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode(
            train_decoder, impute_finished=True)
        tiled_inputs = tile_batch(memory, multiplier=beam_width)
        tiled_sequence_length = tile_batch(memory_length,
                                           multiplier=beam_width)
        tiled_first_attention = tile_batch(first_attention,
                                           multiplier=beam_width)
        attention_mechanism = BahdanauAttention(
            embedding_size,
            memory=tiled_inputs,
            memory_sequence_length=tiled_sequence_length)
        x2 = tile_batch(x, beam_width)
        encoder_state2 = rnn.LSTMStateTuple(x2, x2)
        cell = AttentionWrapper(lstm,
                                attention_mechanism,
                                output_attention=False)
        cell_state = cell.zero_state(dtype=tf.float32,
                                     batch_size=test_batch_size * beam_width)
        cell_state = cell_state.clone(cell_state=encoder_state2,
                                      attention=tiled_first_attention)
        infer_decoder = BeamSearchDecoder(cell,
                                          embedding=label_embeddings,
                                          start_tokens=[GO] * test_len,
                                          end_token=EOS,
                                          initial_state=cell_state,
                                          beam_width=beam_width,
                                          output_layer=output_l)
        decoder_outputs_infer, decoder_state_infer, decoder_seq_infer = dynamic_decode(
            infer_decoder, maximum_iterations=4)
        return decoder_outputs_train, decoder_outputs_infer, decoder_state_infer
Ejemplo n.º 12
0
  def __init__(self, config, scope):
    self.scope = scope
    self.config = config
    max_seq_length = config.max_seq_length 
    self.global_step = tf.get_variable('global_step', shape=[], dtype='int32', initializer=tf.constant_initializer(0), trainable=False)
    self.x = tf.placeholder(tf.int32, [None, config.max_docs_length], name="x")      # [batch_size, max_doc_len]
    self.x_mask = tf.placeholder(tf.int32, [None, config.max_docs_length], name="x_mask")      # [batch_size, max_doc_len]
    if config.model_name.endswith("flat"):
      self.y = tf.placeholder(tf.int32, [None, config.n_classes], name="y")
    else:
      self.y = tf.placeholder(tf.int32, [None, config.max_seq_length], name="y")
    print("y", self.y.get_shape())
    self.y_mask = tf.placeholder(tf.int32, [None, max_seq_length], name="y_mask")
    self.y_decoder = tf.placeholder(tf.int32, [None, max_seq_length], name="y-decoder")
    self.x_seq_length = tf.placeholder(tf.int32, [None], name="x_seq_length")
    self.y_seq_length = tf.placeholder(tf.int32, [None], name="y_seq_length")
    self.keep_prob = tf.placeholder(tf.float32, name="keep_prob")
    self.output_l = layers_core.Dense(config.n_classes, use_bias=True)
    if config.model_name == "hclf_baseline": config.decode_size = config.hidden_size
    else: config.decode_size = 2*config.hidden_size  
    
    if config.project:
      initializer=tf.random_normal_initializer(stddev=0.1)
      self.W_projection = tf.get_variable('W_projection', shape = [config.hidden_size*2 + config.word_embedding_size, config.decode_size], initializer = initializer)
      self.b_projection = tf.get_variable('bias', shape = [config.decode_size])

    if config.concat_w2v and not config.project:  config.decode_size = 2*config.hidden_size + config.word_embedding_size 

    self.lstm = rnn.LayerNormBasicLSTMCell(config.decode_size, dropout_keep_prob=config.keep_prob)  # lstm for decode 
    self.encode_lstm = rnn.LayerNormBasicLSTMCell(config.hidden_size, dropout_keep_prob=config.keep_prob) # lstm for encode 
    # TODO config.emb_mat 
    # self.word_embeddings = tf.constant(config.emb_mat, dtype=tf.float32, name="word_embeddings")
    self.word_embeddings = tf.get_variable("word_embeddings", dtype='float', shape=[config.vocab_size, config.word_embedding_size], initializer=get_initializer(config.emb_mat))
    self.label_embeddings = tf.get_variable(name="label_embeddings", shape=[config.n_classes, config.label_embedding_size], dtype=tf.float32)
    self.xx = tf.nn.embedding_lookup(self.word_embeddings, self.x)  # [None, DL, d]    
    self.yy = tf.nn.embedding_lookup(self.label_embeddings, self.y_decoder) # [None, seq_l, d]    
    self._build_encode(config)
    self._build_train(config)
    if not config.model_name.endswith("flat"):
      self._build_infer(config)
    self._build_loss(config)
    #self.infer_set = set()
    self.summary = tf.summary.merge_all()
    self.summary = tf.summary.merge(tf.get_collection("summaries", scope=self.scope))
Ejemplo n.º 13
0
    def inference(self, reuse=None):

        # initialization
        raw_inputs = self.inputs
        batch_size = self.batch_size
        keep_prob = self.keep_probability
        is_training = self.is_training

        tf.set_random_seed(SEED)  # initialize the random seed at graph level

        lstm_cell = rnn.LayerNormBasicLSTMCell(lstm_cell_size, dropout_keep_prob=keep_prob, reuse=reuse,
                                               dropout_prob_seed=SEED)

        initial_state = lstm_cell.zero_state(batch_size, tf.float32)

        init_sw = tf.ones([batch_size, int(bdnn_winlen)]) * 0  # start sign

        self.mean_bps.append(init_sw)

        init_sw = tf.cast(tf.greater(init_sw, 0.4), tf.float32)
        self.sampled_bps.append(init_sw)

        reuse_recurrent = None

        init_glimpse = self.get_glimpse(raw_inputs, init_sw, reuse=reuse_recurrent)  # (batch_size, glimpse_out)

        inputs = [0] * nGlimpses
        outputs = [0] * nGlimpses
        glimpse = init_glimpse

        for time_step in range(nGlimpses):

            if time_step == 0:
                with tf.variable_scope("core_network", reuse=reuse_recurrent):
                    (cell_output, cell_state) = lstm_cell(glimpse, initial_state)
                    self.cell_outputs.append(initial_state)
            else:
                reuse_recurrent = True
                with tf.variable_scope("core_network", reuse=reuse_recurrent):
                    (cell_output, cell_state) = lstm_cell(glimpse, cell_state)

            inputs[time_step] = glimpse
            outputs[time_step] = cell_output

            if time_step != nGlimpses - 1:  # not final time_step

                glimpse = self.get_next_input(cell_output, reuse=reuse_recurrent)

            else:  # final time_step
                with tf.variable_scope("baseline", reuse=reuse_recurrent):

                    baseline = tf.sigmoid(affine_transform(((cell_output)), 1, name='baseline'))

                    self.baselines.append(baseline)

        return outputs
Ejemplo n.º 14
0
    def BiRNN(self, x, weight, bias, initializer, activation):

        x = tf.unstack(tf.reshape(
            x, [tf.shape(x)[0], self.STEPS,
                int(self.X_DIM / self.STEPS)]),
                       self.STEPS,
                       axis=1)

        # lstm_fw_cell = rnn.CoupledInputForgetGateLSTMCell(self.CELL_SIZE,
        # 													forget_bias=10.0,
        # 													use_peepholes = True,
        # 													proj_clip = 15.0,
        # 													initializer = initializer,
        # 													activation = activation)
        # lstm_bw_cell = rnn.CoupledInputForgetGateLSTMCell(self.CELL_SIZE,
        # 													forget_bias=10.0,
        # 													use_peepholes = True,
        # 													proj_clip = 15.0,
        # 													initializer = initializer,
        # 													activation = activation)

        lstm_fw_cell = rnn.LayerNormBasicLSTMCell(self.CELL_SIZE,
                                                  forget_bias=10.0,
                                                  dropout_keep_prob=self.prob,
                                                  activation=activation)
        lstm_bw_cell = rnn.LayerNormBasicLSTMCell(self.CELL_SIZE,
                                                  forget_bias=10.0,
                                                  dropout_keep_prob=self.prob,
                                                  activation=activation)

        # lstm_fw_cell = rnn.GRUCell(self.CELL_SIZE, activation = tf.nn.relu)
        # lstm_bw_cell = rnn.GRUCell(self.CELL_SIZE, activation = tf.nn.relu)

        outputs, _, _ = tf.nn.static_bidirectional_rnn(lstm_fw_cell,
                                                       lstm_bw_cell,
                                                       x,
                                                       dtype=tf.float32)

        #out = tf.layers.batch_normalization(outputs[-1])

        return tf.matmul(outputs[-1], weight) + bias, outputs
Ejemplo n.º 15
0
	def create_model(self, x, dropout=1, predict=False):
		if predict==False:
			self.create_vars()
		lstm_cell = [rnn.LayerNormBasicLSTMCell(self.rnn_size, activation=tf.nn.relu,\
					reuse=tf.AUTO_REUSE, dropout_keep_prob=dropout) for _ in range(self.num_layer)]
		lstm_cell = rnn.MultiRNNCell(lstm_cell)
		outputs, states = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32)
		
		#output shape is (train size, seq len, n classsses)
		#unstack turns it into list with length seq len, each with shape (train size, n classes)
		outputs = tf.unstack(outputs, self.seq_len, 1)
		return tf.matmul(outputs[-1], self.W) + self.b
Ejemplo n.º 16
0
 def create_cell():
     if self.config.RNN_CELL == 'lnlstm':
         cell = rnn.LayerNormBasicLSTMCell(self.config.ENC_RNN_SIZE)
     elif self.config.RNN_CELL == 'lstm':
         cell = rnn.BasicLSTMCell(self.config.ENC_RNN_SIZE)
     elif self.config.RNN_CELL == 'gru':
         cell = rnn.GRUCell(self.config.ENC_RNN_SIZE)
     else:
         logger.error('rnn_cell {} not supported'.format(self.config.RNN_CELL))
     if self.is_training:
         cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=self.config.DROPOUT_KEEP)
     return cell
Ejemplo n.º 17
0
    def lstm_cell(self, hparams, train):
        keep_prob = 1.0 - hparams.rec_dropout * tf.to_float(train)

        recurrent_dropout_cell = contrib_rnn.LayerNormBasicLSTMCell(
            hparams.hidden_size,
            layer_norm=hparams.layer_norm,
            dropout_keep_prob=keep_prob)

        if hparams.ff_dropout:
            return contrib_rnn.DropoutWrapper(recurrent_dropout_cell,
                                              input_keep_prob=keep_prob)
        return recurrent_dropout_cell
Ejemplo n.º 18
0
    def __init__(self, ob_space, ac_space, lstm_size=256, **kwargs):
        self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space))

        rank = len(ob_space)

        if rank == 3:  # pixel input
            for i in range(4):
                x = tf.nn.elu(
                    conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2]))
        elif rank == 1:  # plain features
            #x = tf.nn.elu(linear(x, 256, "l1", normalized_columns_initializer(0.01)))
            pass
        else:
            raise TypeError("observation space must have rank 1 or 3, got %d" %
                            rank)

        # introduce a "fake" batch dimension of 1 after flatten so that we can do LSTM over time dim
        x = tf.expand_dims(flatten(x), [0])

        size = lstm_size
        lnlstm = rnn.LayerNormBasicLSTMCell(size)
        self.state_size = lnlstm.state_size
        step_size = tf.shape(self.x)[:1]

        c_init = np.zeros((1, lnlstm.state_size.c), np.float32)
        h_init = np.zeros((1, lnlstm.state_size.h), np.float32)
        self.state_init = [c_init, h_init]
        c_in = tf.placeholder(tf.float32, [1, lnlstm.state_size.c])
        h_in = tf.placeholder(tf.float32, [1, lnlstm.state_size.h])
        self.state_in = [c_in, h_in]

        state_in = rnn.LSTMStateTuple(c_in, h_in)
        lstm_outputs, lstm_state = tf.nn.dynamic_rnn(lnlstm,
                                                     x,
                                                     initial_state=state_in,
                                                     sequence_length=step_size,
                                                     time_major=False)
        lstm_c, lstm_h = lstm_state
        x = tf.reshape(lstm_outputs, [-1, size])
        self.logits = linear(x, ac_space, "action",
                             normalized_columns_initializer(0.01))
        self.vf = tf.reshape(
            linear(x, 1, "value", normalized_columns_initializer(1.0)), [-1])
        self.state_out = [lstm_c[:1, :], lstm_h[:1, :]]
        self.sample = categorical_sample(self.logits, ac_space)[0, :]
        self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          tf.get_variable_scope().name)
Ejemplo n.º 19
0
    def _which_cell(self):
        """
		RNN 类型
		:return: 
		"""
        cell_tmp = None
        if self.cell_type == 'lstm':
            cell_tmp = rnn.LayerNormBasicLSTMCell(
                self.hidden_unit, dropout_keep_prob=self.dropout_rate)
            #cell_tmp = rnn.BasicLSTMCell(self.hidden_unit)
        elif self.cell_type == 'gru':
            cell_tmp = rnn.GRUCell(self.hidden_unit)
        # 是否需要进行dropout
        if self.dropout_rate is not None:
            cell_tmp = rnn.DropoutWrapper(cell_tmp,
                                          output_keep_prob=self.dropout_rate)
        return cell_tmp
Ejemplo n.º 20
0
def _single_cell(unit_type,
                 num_units,
                 forget_bias,
                 dropout,
                 mode,
                 residual_connection=False,
                 residual_fn=None,
                 trainable=True):
    """Create an instance of a single RNN cell."""
    # dropout (= 1 - keep_prob) is set to 0 during eval and infer
    dropout = dropout if mode == tf.estimator.ModeKeys.TRAIN else 0.0

    # Cell Type
    if unit_type == "lstm":
        single_cell = contrib_rnn.LSTMCell(num_units,
                                           forget_bias=forget_bias,
                                           trainable=trainable)
    elif unit_type == "gru":
        single_cell = contrib_rnn.GRUCell(num_units, trainable=trainable)
    elif unit_type == "layer_norm_lstm":
        single_cell = contrib_rnn.LayerNormBasicLSTMCell(
            num_units,
            forget_bias=forget_bias,
            layer_norm=True,
            trainable=trainable)
    elif unit_type == "nas":
        single_cell = contrib_rnn.NASCell(num_units, trainable=trainable)
    else:
        raise ValueError("Unknown unit type %s!" % unit_type)

    # Dropout (= 1 - keep_prob).
    if dropout > 0.0:
        single_cell = contrib_rnn.DropoutWrapper(cell=single_cell,
                                                 input_keep_prob=(1.0 -
                                                                  dropout))

    # Residual.
    if residual_connection:
        single_cell = contrib_rnn.ResidualWrapper(single_cell,
                                                  residual_fn=residual_fn)

    return single_cell
Ejemplo n.º 21
0
 def cell(dims):
     '''Creates RNN/GRU/LSTM cell to use in multiple layers.'''
     # RNN
     if hps.model_type == "BIRNN":
         cell = rnn.BasicRNNCell(dims)
     # LSTM
     elif hps.model_type == "BILSTM":
         cell = rnn.LSTMCell(dims, state_is_tuple=True)
     # LSTM with peepholes
     elif hps.model_type == "BILSTMP":
         cell = rnn.LSTMCell(dims,
                             state_is_tuple=True,
                             use_peepholes=True)
     # GRU
     elif hps.model_type == "BIGRU":
         cell = rnn.GRUCell(dims)
     # Update Gate RNN
     elif hps.model_type == "BIUGRNN":
         cell = rnn.UGRNNCell(dims)
     # Bi-directional Layer Norm LSTM
     elif hps.model_type == "BILNLSTM":
         cell = rnn.LayerNormBasicLSTMCell(dims)
     return rnn.DropoutWrapper(cell, output_keep_prob=self.dropout)
Ejemplo n.º 22
0
def DilatedLSTM(s_t, size, state_in, idx_in, chunks=8, norm=False, dropout=1.0):
    # lstm = rnn.LSTMCell(size, state_is_tuple=True)
    lstm = rnn.LayerNormBasicLSTMCell(size, layer_norm=norm, activation=tf.nn.relu, dropout_keep_prob=dropout)
    c_init = np.zeros((chunks, lstm.state_size.c), np.float32)
    h_init = np.zeros((chunks, lstm.state_size.h), np.float32)
    state_init = [c_init, h_init]

    def dlstm_scan_fn(previous_output, current_input):
        i = previous_output[2]
        c = previous_output[1][0]
        h = previous_output[1][1]
        # old_state = [tf.expand_dims(c[i],[0]),tf.expand_dims(h[i],[0])]
        old_state = rnn.LSTMStateTuple(tf.expand_dims(c[i], [0]), tf.expand_dims(h[i], [0]))
        out, state_out = lstm(current_input, old_state)
        c = tf.stop_gradient(c)
        h = tf.stop_gradient(h)
        co = state_out[0]
        ho = state_out[1]
        col = tf.expand_dims(tf.one_hot(i, chunks), [1])
        new_mask = col
        for _ in range(size - 1):
            new_mask = tf.concat([new_mask, col], axis=1)
        old_mask = 1 - new_mask
        c_out = c * old_mask + co * new_mask
        h_out = h * old_mask + ho * new_mask
        state_out = [c_out, h_out]
        i += tf.constant(1)
        new_i = tf.mod(i, chunks)
        out = tf.reduce_mean(h_out, axis=0)
        return out, state_out, new_i

    rnn_outputs, final_states, out_idx = tf.scan(dlstm_scan_fn,
                                                 tf.transpose(s_t, [1, 0, 2]),
                                                 initializer=(state_in[1][0], state_in, idx_in))
    state_out = [final_states[0][0, :, :], final_states[1][0, :, :]]
    return rnn_outputs, state_init, state_out, out_idx[-1]
Ejemplo n.º 23
0
def _match_model_fn_v6(features, labels, mode, params):
    '''
    this version uses origianl seq2seq, but uses a lstm merges the cause and word embedding_tabel

    and this version use the input embedding as the attention query
    '''
    # print('aaa')
    '''set parameters'''
    with tf.device('/gpu:0'), tf.variable_scope('model',
                                                reuse=tf.AUTO_REUSE) as scope:
        # set hyper parameters
        embedding_size = params['embedding_size']
        num_units = params['num_units']
        if mode == tf.estimator.ModeKeys.TRAIN:
            dropout_keep_prob = params['dropout_keep_prob']
        else:
            dropout_keep_prob = 1
        beam_width = params['beam_width']
        EOS = params['EOS']
        SOS = params['SOS']
        # set training parameters
        max_sequence_length = params['max_sequence_length']
        max_cause_length = params['max_cause_length']
        vocab_size = params['vocab_size']
        num_causes = EOS + 1
        '''process input and target'''
        # input layer
        input = tf.reshape(features['content'], [-1, max_sequence_length])
        batch_size = tf.shape(input)[0]
        input_length = tf.reshape(features['content_length'], [batch_size])
        cause_label = tf.reshape(labels['cause_label'],
                                 [batch_size, max_cause_length])
        cause_length = tf.reshape(labels['cause_length'], [batch_size])

        # necessary cast
        input = tf.cast(input, dtype=tf.int32)
        input_length = tf.cast(input_length, dtype=tf.int32)
        cause_label = tf.cast(cause_label, dtype=tf.int32)
        cause_length = tf.cast(cause_length, dtype=tf.int32)

        # word embedding layer
        embeddings_word = load_embedding(params['word2vec_model'], vocab_size,
                                         embedding_size)

        embedded_input = gen_array_ops.gather_v2(embeddings_word,
                                                 input,
                                                 axis=0)
        # cause-label embedding layer
        cause_encoder = CauseEncoder(word_embeddings=embeddings_word,
                                     params=params)
        embedded_cause = cause_encoder.apply(cause_label)

        # cause lookpu_table
        cause_table = tf.constant(params['cause_table'], dtype=tf.int32)
        encoder_output = encoders(embedded_input, input_length, params, mode)
        '''hierarchical multilabel decoder'''
        # build lstm cell with attention
        lstm = rnn.LayerNormBasicLSTMCell(num_units=num_units,
                                          reuse=tf.AUTO_REUSE,
                                          dropout_keep_prob=dropout_keep_prob)
        # lstm = rnn.DropoutWrapper(lstm, output_keep_prob=dropout_keep_prob)
        # the subtraction at the end of the line is a ele-wise subtraction supported by tensorflow

        attention_mechanism = MyBahdanauAttention(
            num_units=embedding_size,
            memory=encoder_output.attention_values,
            memory_sequence_length=encoder_output.attention_values_length)
        initial_state = rnn.LSTMStateTuple(encoder_output.initial_state,
                                           encoder_output.initial_state)
        cell = MyAttentionWrapper_v2(lstm,
                                     attention_mechanism,
                                     sot=SOS,
                                     output_attention=False,
                                     name="MyAttentionWrapper")
        cell_state = cell.zero_state(dtype=tf.float32, batch_size=batch_size)
        cell_state = cell_state.clone(cell_state=initial_state,
                                      attention=encoder_output.final_state)

        # extra dense layer to project a rnn output into a classification
        project_dense = Dense(num_causes,
                              _reuse=tf.AUTO_REUSE,
                              _scope='project_dense_scope',
                              name='project_dense')

        # train_decoder
        train_helper = MyTrainingHelper(embedded_cause, cause_label,
                                        cause_length)
        train_decoder = MyBasicDecoder(cell,
                                       train_helper,
                                       cell_state,
                                       lookup_table=cause_table,
                                       output_layer=project_dense,
                                       hie=params['hie'])

        decoder_output_train, decoder_state_train, decoder_len_train = dynamic_decode(
            train_decoder,
            maximum_iterations=max_cause_length - 1,
            parallel_iterations=64,
            scope='decoder')

        # beam_width = 1
        tiled_memory_sequence_length = tile_batch(
            encoder_output.attention_values_length, multiplier=beam_width)
        tiled_memory = tile_batch(encoder_output.attention_values,
                                  multiplier=beam_width)
        tiled_encoder_output_initital_state = tile_batch(
            encoder_output.initial_state, multiplier=beam_width)
        tiled_initial_state = rnn.LSTMStateTuple(
            tiled_encoder_output_initital_state,
            tiled_encoder_output_initital_state)
        tiled_first_attention = tile_batch(encoder_output.final_state,
                                           multiplier=beam_width)

        attention_mechanism = MyBahdanauAttention(
            num_units=embedding_size,
            memory=tiled_memory,
            memory_sequence_length=tiled_memory_sequence_length)

        cell = MyAttentionWrapper_v2(lstm,
                                     attention_mechanism,
                                     sot=SOS,
                                     output_attention=False,
                                     name="MyAttentionWrapper")
        cell_state = cell.zero_state(dtype=tf.float32,
                                     batch_size=batch_size * beam_width)
        cell_state = cell_state.clone(cell_state=tiled_initial_state,
                                      attention=tiled_first_attention)
        infer_decoder = MyBeamSearchDecoder(cell,
                                            embedding=cause_encoder,
                                            sots=tf.fill([batch_size], SOS),
                                            start_tokens=tf.fill([batch_size],
                                                                 SOS),
                                            end_token=EOS,
                                            initial_state=cell_state,
                                            beam_width=beam_width,
                                            output_layer=project_dense,
                                            lookup_table=cause_table,
                                            length_penalty_weight=0.7,
                                            hie=params['hie'])

        cause_output_infer, cause_state_infer, cause_length_infer = dynamic_decode(
            infer_decoder,
            parallel_iterations=64,
            maximum_iterations=max_cause_length - 1,
            scope='decoder')

        # loss
        mask_for_cause = tf.sequence_mask(cause_length - 1,
                                          max_cause_length - 1,
                                          dtype=tf.float32)
        # loss = sequence_loss(logits=padded_train_output, targets=cause_label, weights=mask_for_cause, name='loss')
        tmp_padding = tf.pad(decoder_output_train.rnn_output,
                             [[0, 0],
                              [
                                  0, max_cause_length - 1 -
                                  tf.shape(decoder_output_train.rnn_output)[1]
                              ], [0, 0]],
                             constant_values=0)
        loss = _compute_loss(tmp_padding, cause_label, mask_for_cause,
                             batch_size)
        # predicted_ids: [batch_size, max_cause_length, beam_width]

        predicted_and_cause_ids = tf.transpose(
            cause_output_infer.predicted_ids,
            perm=[0, 2, 1],
            name='predicted_cause_ids')

        # for monitoring
        cause_label_expanded = tf.reshape(cause_label[:, 1:],
                                          [-1, 1, max_cause_length - 1])
        predicted_and_cause_ids = tf.pad(
            predicted_and_cause_ids,
            [[0, 0], [0, 0],
             [0, max_cause_length - 1 - tf.shape(predicted_and_cause_ids)[2]]],
            constant_values=EOS)
        predicted_and_cause_ids = tf.concat(
            [predicted_and_cause_ids, cause_label_expanded],
            axis=1,
            name='predicted_and_cause_ids')
        predicted_and_cause_ids = tf.reshape(
            predicted_and_cause_ids,
            [-1, beam_width + 1, max_cause_length - 1])
        predicted_and_cause_ids_train = tf.concat(
            [decoder_output_train.sample_id, cause_label[:, 1:]],
            axis=1,
            name='predicted_and_cause_ids_train')

        predictions = {
            'predicted_and_cause_ids': predicted_and_cause_ids,
        }
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)

        if mode == tf.estimator.ModeKeys.TRAIN:
            # warm_up_constant = params['warm_up_steps'] ** (-1.5)
            # embedding_constant = embedding_size ** (-0.5)
            # global_step = tf.to_float(tf.train.get_global_step())
            # learning_rate = tf.minimum(1 / tf.sqrt(global_step),
            #                            warm_up_constant * global_step) * embedding_constant
            # optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9)
            optimizer = tf.train.AdamOptimizer()
            # # train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
            # '''using gradient clipping'''
            # loss = tf.Print(loss, [loss, 'to be clear, this is the loss'])
            grads_and_vars = optimizer.compute_gradients(loss)
            clipped_gvs = [
                ele if ele[0] is None else
                (tf.clip_by_value(ele[0], -0.1, 0.1), ele[1])
                for ele in grads_and_vars
            ]
            train_op = optimizer.apply_gradients(
                clipped_gvs, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        # predicted_cause_ids shape = [batch_size, cause_length]
        # cause_label = [batch_size, cause_length]
        #  select the predicted cause with the highest possibility
        # todo: evalutaion
        # bi_predicted_cause_ids = binarizer(predicted_cause_ids[:, 0, :], num_causes)
        # bi_cause_label = binarizer(cause_label, num_causes)

        # todo: now I have to leave the evaluation work be done outside the estimator
        eval_metric_ops = {
            'predicted_and_cause_ids':
            tf.contrib.metrics.streaming_concat(predicted_and_cause_ids),
            # 'precision': tf.metrics.precision(bi_cause_label, bi_predicted_cause_ids),
            # 'recall': tf.metrics.recall(bi_cause_label, bi_predicted_cause_ids),
            # 'f1-score': f_score(bi_cause_label, bi_predicted_cause_ids),
        }
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)
Ejemplo n.º 24
0
keep_prob = tf.placeholder(tf.float32)

xx = tf.nn.embedding_lookup(word_embeddings, x)  # [None, DL, d]
yy = tf.nn.embedding_lookup(label_embeddings, y_decoder)  # [None, seq_l, d]

# encode here
''' 
lstm = rnn.LayerNormBasicLSTMCell(hidden_size/2, dropout_keep_prob=keep_prob)
outputs, output_states = tf.nn.bidirectional_dynamic_rnn(lstm, lstm, xx, dtype='float', sequence_length=x_seq_length)   
xx_context = tf.concat(outputs, 2)   # [None, DL, 2*hd]
xx_final = tf.concat(output_states, 1)  # [None, 2*hd]
x_mask = tf.cast(x_mask, "float")
first_attention = tf.reduce_mean(xx_context,  1)    # [None, 2*hd]
'''

lstm = rnn.LayerNormBasicLSTMCell(hidden_size, dropout_keep_prob=keep_prob)
outputs, output_states = tf.nn.dynamic_rnn(lstm,
                                           xx,
                                           dtype='float',
                                           sequence_length=x_seq_length)
xx_context = outputs  # tf.concat(outputs, 2)   # [None, DL, 2*hd]
xx_final = output_states[0]  # tf.concat(output_states, 1)  # [None, 2*hd]
x_mask = tf.cast(x_mask, "float")
first_attention = tf.reduce_mean(xx_context, 1)  # [None, 2*hd]
# decode
output_l = layers_core.Dense(n_classes, use_bias=True)
encoder_state = rnn.LSTMStateTuple(xx_final, xx_final)
attention_mechanism = BahdanauAttention(hidden_size,
                                        memory=xx_context,
                                        memory_sequence_length=x_seq_length)
Ejemplo n.º 25
0
 def _rnn_cell(self):
     cell = rnn.LayerNormBasicLSTMCell(self.rnn_hidden_size,
                                       dropout_keep_prob=1 -
                                       self.rnn_dropout_rate)
     return cell
Ejemplo n.º 26
0
#--------------------------------------Define Graph---------------------------------------------------#
graph = tf.Graph()
with graph.as_default():

    #------------------------------------construct LSTM------------------------------------------#
    #place hoder
    X_p = tf.placeholder(dtype=tf.float32,
                         shape=(None, TIME_STEPS, 28),
                         name="input_placeholder")
    y_p = tf.placeholder(dtype=tf.float32,
                         shape=(None, 10),
                         name="pred_placeholder")

    #gru instance
    ln_forward_1 = rnn.LayerNormBasicLSTMCell(num_units=HIDDEN_UNITS1)
    ln_forward_2 = rnn.LayerNormBasicLSTMCell(num_units=HIDDEN_UNITS)
    ln_forward = rnn.MultiRNNCell(cells=[ln_forward_1, ln_forward_2])

    ln_backward_1 = rnn.LayerNormBasicLSTMCell(num_units=HIDDEN_UNITS1)
    ln_backward_2 = rnn.LayerNormBasicLSTMCell(num_units=HIDDEN_UNITS)
    ln_backward = rnn.MultiRNNCell(cells=[ln_backward_1, ln_backward_2])

    outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw=ln_forward,
                                                      cell_bw=ln_backward,
                                                      inputs=X_p,
                                                      dtype=tf.float32)

    outputs_fw = outputs[0]
    outputs_bw = outputs[1]
    h = outputs_fw[:, -1, :] + outputs_bw[:, -1, :]
Ejemplo n.º 27
0
 def get_rnn_cell(self):
     return rnn.DropoutWrapper(rnn.LayerNormBasicLSTMCell(self.hidden_dim),
                               input_keep_prob=self.dropout_keep_prob_t,
                               output_keep_prob=self.dropout_keep_prob_t)
Ejemplo n.º 28
0
 def __init__(self, cause_table, initial_state, final_state, params, mode):
     super(LSTM_Attention, self).__init__(cause_table, initial_state,
                                          final_state, params, mode)
     self._lstm_cell = rnn.LayerNormBasicLSTMCell(
         num_units=params['num_units'],
         dropout_keep_prob=self._dropout_keep_prob)
Ejemplo n.º 29
0
 def single_lnlstm(self, layer_size):
     lnlstm_cell = rnn.LayerNormBasicLSTMCell(
         layer_size, dropout_keep_prob=self.dropout_rate)
     return lnlstm_cell
Ejemplo n.º 30
0
    def declare_model(self, skip_preprocess=False):
        if not skip_preprocess:
            assert self.data_prepared, "Preprocess your data first"

        # n classes (total number of unique characters)
        self.vocab_size = len(list(self.lexicon.keys()))

        # number of tweets to process
        self.num_tweets = self.twitter_data.Clean_Tweets.values.shape[0]

        self.X = tf.placeholder(tf.int32,
                                [self.batch_size, self.fixed_tweet_size - 1],
                                name='X')
        one_hot_encoded = tf.one_hot(self.X, self.vocab_size)
        self.y = tf.placeholder(tf.int32,
                                [self.batch_size, self.fixed_tweet_size - 1],
                                name='y')
        labels = tf.one_hot(self.y, self.vocab_size)

        self.testX = tf.placeholder(tf.int32, [1, None], name='testX')
        test_one_hot_encoded = tf.one_hot(self.testX, self.vocab_size)

        rnn_layers = [
            rnn.LayerNormBasicLSTMCell(size, forget_bias=1.0)
            for size in self.model_shape
        ]
        self.multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)
        self.lstm_init_value = self.multi_rnn_cell.zero_state(
            self.batch_size, tf.float32)
        self.test_lstm_init_value = self.multi_rnn_cell.zero_state(
            1, tf.float32)
        self.outputs, self.states = tf.nn.dynamic_rnn(
            self.multi_rnn_cell,
            one_hot_encoded,
            initial_state=self.lstm_init_value,
            dtype=tf.float32)
        self.test_outputs, self.test_states = tf.nn.dynamic_rnn(
            self.multi_rnn_cell,
            test_one_hot_encoded,
            initial_state=self.test_lstm_init_value,
            dtype=tf.float32)

        self.flat_outputs = tf.reshape(self.outputs,
                                       [-1, self.model_shape[-1]])
        self.test_flat_outputs = tf.reshape(self.test_outputs,
                                            [-1, self.model_shape[-1]])

        self.logits = tf.layers.dense(self.flat_outputs,
                                      self.vocab_size,
                                      None,
                                      True,
                                      tf.orthogonal_initializer(),
                                      name='dense')
        self.test_logits = tf.layers.dense(self.test_flat_outputs,
                                           self.vocab_size,
                                           None,
                                           True,
                                           tf.orthogonal_initializer(),
                                           name='testdense')  # might not reuse

        self.loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=self.logits,
                labels=tf.reshape(labels, [-1, self.vocab_size])))
        self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(
            self.loss)