Ejemplo n.º 1
0
def lbirnn_stock(inputs,
                 lengths,
                 is_training,
                 num_layers=2,
                 cell_type=tf.contrib.rnn.GRUCell,
                 cell_size=64,
                 initial_state_fwd=None,
                 initial_state_bwd=None,
                 **kwargs):
    validate_extractor_inputs(inputs, lengths)

    num_stages = len(inputs)

    fwd_ = initial_state_fwd
    bwd_ = initial_state_bwd

    prev_varscope = None
    for n_stage in xrange(num_stages):
        with tf.variable_scope(
                "serial-lbirnn-stock-seq{}".format(n_stage)) as varscope:
            if prev_varscope is not None:
                prev_varscope.reuse_variables()
            code, states = _lbirnn_stock_helper(inputs[n_stage],
                                                lengths[n_stage],
                                                is_training=is_training,
                                                num_layers=num_layers,
                                                cell_type=cell_type,
                                                cell_size=cell_size,
                                                initial_state_fwd=fwd_,
                                                initial_state_bwd=bwd_,
                                                scope=varscope,
                                                **kwargs)
            fwd_ = states[0]
            bwd_ = states[1]
            prev_varscope = varscope

    # concatenate hx_fwd and hx_bwd of top layer
    # `states` = ((cx_fwd, hx_fwd), (cx_bwd, hx_bwd))

    # TODO for GRU

    if num_layers > 1:
        # shape states: [2, num_layers, 2]
        # (cf. https://github.com/coastalcph/mtl-disparate/blob/master/mtl/nn.py#L43)
        if cell_type == tf.contrib.rnn.GRUCell:
            # TODO
            pass
        elif cell_type == tf.contrib.rnn.LSTMCell:
            output = tf.concat([states[0][-1][1], states[1][-1][1]], 1)
    else:
        # shape states: [2, 2]
        # (cf. https://github.com/coastalcph/mtl-disparate/blob/master/mtl/nn.py#L40)
        # if cell_type == tf.contrib.rnn.GRUCell or cell_type == RANCell:
        if cell_type == tf.contrib.rnn.GRUCell:
            output = tf.concat([states[0], states[1]], 1)
        elif cell_type == tf.contrib.rnn.LSTMCell:
            output = tf.concat([states[0][1], states[1][1]], 1)

    return output
Ejemplo n.º 2
0
def cnn_extractor(inputs, lengths, num_filter, max_width, activation_fn,
                  reducer, **kwargs):
    validate_extractor_inputs(inputs, lengths)

    num_stages = len(inputs)

    code = []
    prev_varscope = None
    for n_stage in xrange(num_stages):
        with tf.variable_scope("cnn-seq{}".format(n_stage)) as varscope:
            if prev_varscope is not None:
                prev_varscope.reuse_variables()
            if n_stage == 0:
                cond_inputs = inputs[0]
            else:
                # condition reading of seq_n on learned features of seq_n-1
                p = tf.expand_dims(p, axis=1)

                max_len = tf.reduce_max(
                    lengths[n_stage],
                    axis=0)  # get length of longest seq_n in batch)
                max_len = tf.reshape(max_len, [1])
                max_len = tf.cast(max_len, dtype=tf.int32)
                max_len = tf.concat(
                    [tf.constant([1]), max_len,
                     tf.constant([1])], axis=0)  # [1, max_len, 1]

                p = tf.tile(p, max_len)  # tile over time dimension

                cond_inputs = tf.concat([inputs[n_stage], p],
                                        axis=2)  # condition via concatenation

                # mask out p for padded tokens
                mask = tf.sequence_mask(lengths[n_stage], dtype=tf.int32)
                mask = tf.expand_dims(mask, axis=2)
                mask = tf.cast(mask, dtype=tf.float32)
                cond_inputs = tf.multiply(cond_inputs, mask)

            p = _conv_and_pool(cond_inputs,
                               lengths=lengths[n_stage],
                               num_filter=num_filter,
                               max_width=max_width,
                               activation_fn=activation_fn,
                               reducer=reducer)

            prev_varscope = varscope

    outputs = p

    return outputs
Ejemplo n.º 3
0
def paragram_phrase(inputs,
                    lengths,
                    reducer,
                    apply_activation,
                    activation_fn):
    validate_extractor_inputs(inputs, lengths)

    num_stages = len(inputs)

    code = []
    prev_varscope = None
    for n_stage in xrange(num_stages):
        with tf.variable_scope("paragram-seq{}".format(n_stage)) as varscope:
            if prev_varscope is not None:
                prev_varscope.reuse_variables()
            p = _paragram_phrase_helper(inputs[n_stage],
                                        lengths[n_stage],
                                        reducer=reducer,
                                        apply_activation=False,
                                        activation_fn=None)
            code.append(p)
            prev_varscope = varscope

    ranks = [len(p.get_shape()) for p in code]
    assert all(rank == 2 for rank in ranks)  # <batch_size, embed_dim>
    code = tf.concat(code, axis=1)

    if apply_activation:
        outputs = dense_layer(code,
                              code.get_shape().as_list()[1],
                              # keep same dimensionality
                              name="paragram-output",
                              activation=activation_fn)
    else:
        outputs = code

    return outputs
Ejemplo n.º 4
0
def lbirnn(inputs,
           lengths,
           is_training,
           indices=None,
           num_layers=2,
           cell_type=tf.contrib.rnn.GRUCell,
           cell_size=64,
           initial_state_fwd=None,
           initial_state_bwd=None,
           **kwargs):
    """Serial stacked linear chain bi-directional RNN

    If `indices` is specified for the last stage, the outputs of the tokens
    in the last stage as specified by `indices` will be returned.
    If `indices` is None for the last stage, the encodings for all tokens
    in the sequence are returned.

    Inputs
    _____
      All arguments denoted with (*) should be given as lists,
      one element per stage in the series. The specifications given
      below are for a single stage.

      inputs (*): Tensor of size [batch_size, batch_len, embed_size]
      lengths (*): Tensor of size [batch_size]
      indices: Tensor of which token index in each batch item should be output;
               shape: [batch_size] or [batch_size, 1]
      num_layers: number of stacked layers in the bi-RNN
      cell_type: type of RNN cell to use (e.g., LSTM, GRU)
      cell_size: cell's output size
      initial_state_fwd: initial state for forward direction, may be None
      initial_state_bwd: initial state for backward direction, may be None

    Outputs
    _______
    If the input word vectors have dimension D and the series has N stages:
    if `indices` is not None:
      the output is a Tensor of size [batch_size, cell_size]
    if `indices` is None:
      the output is a Tensor of size [batch_size, batch_len, cell_size]
    """

    validate_extractor_inputs(inputs, lengths)

    num_stages = len(inputs)

    fwd_ = initial_state_fwd
    bwd_ = initial_state_bwd

    prev_varscope = None
    for n_stage in xrange(num_stages):
        # with tf.variable_scope("serial_lbirnn", reuse=tf.AUTO_REUSE) as varscope:
        with tf.variable_scope(
                "serial-lbirnn-seq{}".format(n_stage)) as varscope:
            if prev_varscope is not None:
                prev_varscope.reuse_variables()
            if n_stage == num_stages - 1:
                # Use the user-specified indices on the last stage
                indices_ = indices
            else:
                indices_ = None

            o, s = _lbirnn_helper(inputs[n_stage],
                                  lengths[n_stage],
                                  is_training=is_training,
                                  indices=indices_,
                                  num_layers=num_layers,
                                  cell_type=cell_type,
                                  cell_size=cell_size,
                                  initial_state_fwd=fwd_,
                                  initial_state_bwd=bwd_,
                                  scope=varscope,
                                  **kwargs)
            (code_fwd, code_bwd), (last_state_fwd, last_state_bwd) = o, s
            # Update arguments for next stage
            fwd_ = last_state_fwd
            bwd_ = last_state_bwd
            prev_varscope = varscope

    code = tf.concat([code_fwd, code_bwd], axis=-1)
    outputs = code

    return outputs
Ejemplo n.º 5
0
def dan(inputs,
        lengths,
        word_dropout_rate,
        reducer,
        apply_activation,
        num_layers,
        activation_fns,
        is_training,
        **kwargs):
    """Deep Averaging Network
    https://www.cs.umd.edu/~miyyer/pubs/2015_acl_dan.pdf

    :param inputs: a sequence of word embeddings
    :param lengths: length of the sequence
    :param word_dropout_rate: how much word embeddings to drop
    :param reducer: which pooling(s) to use
    :param apply_activation: whether to add layers
    :param num_layers: number of hidden layers
    :param activation_fns: list of activation functions, None if not applying
    :param is_training: when not in training mode, do not drop word embeddings
    :return: concatenation of poolings of all the word embeddings
    """

    validate_extractor_inputs(inputs, lengths)

    if apply_activation:
        assert len(activation_fns) == num_layers, \
            'Length of apply_activations ' + str(len(activation_fns)) + \
            ' doesn\'t match num_layers ' + str(num_layers) + '!'

    # TODO word dropout test
    # TODO test two-input sequence
    assert 0.0 <= word_dropout_rate < 1.0, \
        'Word dropout rate must be in [0.0, 1.0) !'

    for i, x in enumerate(inputs):
        input_shape = tf.shape(x)
        batch_size = input_shape[0]
        n_time_steps = input_shape[1]

        # entire token word embedding dropout
        all_ones = tf.ones((batch_size, n_time_steps, 1))
        mask = tf.layers.dropout(
            all_ones, rate=word_dropout_rate, training=is_training)
        x = tf.cast(mask, 'float32') * x
        inputs[i] = x

    # all examples must have at least one word
    assert len(inputs) > 0

    num_stages = len(inputs)
    outputs = []
    prev_varscope = None
    for n_stage in xrange(num_stages):
        with tf.variable_scope("dan-seq{}".format(n_stage)) as varscope:
            if prev_varscope is not None:
                prev_varscope.reuse_variables()
            p = reduce(inputs[n_stage],
                       lengths[n_stage],
                       reducer=reducer)
            outputs.append(p)
            prev_varscope = varscope

    ranks = [len(p.get_shape()) for p in outputs]
    assert all(rank == 2 for rank in ranks)  # <batch_size, embed_dim>
    outputs = tf.concat(outputs, axis=1)

    if apply_activation:
        for num_layer, activation_fn in zip(xrange(num_layers),
                                            activation_fns):
            if num_layers == 1:
                layer_name = 'dan-output'
            else:
                layer_name = "dan-output-" + str(num_layer)
            outputs = dense_layer(outputs,
                                  outputs.get_shape().as_list()[1],
                                  # keep same dimensionality
                                  name=layer_name,
                                  activation=activation_fn)

    return outputs