Exemple #1
0
def conv_block(x,
               shortcut,
               filter_width=3,
               filter_channel=64,
               is_training=True,
               scope=None):
    x_dim = x.get_shape()[2]
    with tf.variable_scope(scope or "conv_block") as scope:
        # Convolution 1 step
        W1 = tf.get_variable(name="W1",
                             shape=[filter_width, x_dim, filter_channel],
                             initializer=initializers.get("glorot_uniform"))
        x = tf.nn.conv1d(x, W1, stride=1, padding="SAME")
        x = layers.dropout(x, keep_prob=0.7, is_training=is_training)
        x = tf.nn.relu(x)
        # Convolution 2 step
        W2 = tf.get_variable(name="W2",
                             shape=[filter_width, x_dim, filter_channel],
                             initializer=initializers.get("glorot_uniform"))
        x = tf.nn.conv1d(x, W2, stride=1, padding="SAME")
        x = layers.dropout(x, keep_prob=0.7, is_training=is_training)
        x = tf.nn.relu(x)
        # Residual connection
        if shortcut != None:
            return shortcut + x
        else:
            return x
Exemple #2
0
    def __init__(self,
                 W_regularizer=None,
                 b_regularizer=None,
                 W_constraint=None,
                 b_constraint=None,
                 bias=True,
                 **kwargs):
        """
        Keras Layer that implements an Attention mechanism for temporal data.
        Supports Masking.
        Follows the work of Raffel et al. [https://arxiv.org/abs/1512.08756]
        # Input shape
            3D tensor with shape: `(samples, steps, features)`.
        # Output shape
            2D tensor with shape: `(samples, features)`.
        :param kwargs:
        Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
        The dimensions are inferred based on the output shape of the RNN.
        Note: The layer has been tested with Keras 2.0.6
        Example:
            model.add(LSTM(64, return_sequences=True))
            model.add(Attention())
            # next add a Dense layer (for classification/regression) or whatever...
        """
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        super(Attention, self).__init__(**kwargs)
Exemple #3
0
 def apply(self, is_train, x, mask=None):
     return fully_connected(x,
                            x.shape.as_list()[-1],
                            use_bias=self.bias,
                            activation=activations.get(self.activation),
                            kernel_initializer=_wrap_init(
                                initializers.get(self.w_init)))
Exemple #4
0
    def __init__(self,
                 filters,
                 kernel_size,
                 strides=(1, 1),
                 padding='valid',
                 data_format=None,
                 depth_multiplier=1,
                 activation=None,
                 use_bias=False,
                 depthwise_initializer='glorot_uniform',
                 bias_initializer='zeros',
                 depthwise_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 depthwise_constraint=None,
                 bias_constraint=None,
                 **kwargs):

        super(DepthWiseConv2D,
              self).__init__(filters=filters,
                             kernel_size=kernel_size,
                             strides=strides,
                             padding=padding,
                             data_format=data_format,
                             activation=activation,
                             use_bias=use_bias,
                             bias_regularizer=bias_regularizer,
                             activity_regularizer=activity_regularizer,
                             bias_constraint=bias_constraint,
                             **kwargs)
        self.depth_multiplier = depth_multiplier
        self.depthwise_initializer = initializers.get(depthwise_initializer)
        self.depthwise_regularizer = regularizers.get(depthwise_regularizer)
        self.depthwise_constraint = constraints.get(depthwise_constraint)
Exemple #5
0
 def apply(self, is_train, x, mask=None):
     bias = (self.bias is None) or self.bias  # for backwards compat
     return fully_connected(
         x,
         self.n_out,
         use_bias=bias,
         activation=get_keras_activation(self.activation),
         kernel_initializer=_wrap_init(initializers.get(self.w_init)))
Exemple #6
0
def tri_linear(x, keys):
    with tf.variable_scope("tri_linear_attn") as scope:
        key_w = tf.get_variable("key_w",
                                shape=x.shape.as_list()[-1],
                                initializer=initializers.get("glorot_uniform"),
                                dtype=tf.float32)
        key_logits = tf.tensordot(keys, key_w, axes=[[2],
                                                     [0]])  # (batch, key_len)
        x_w = tf.get_variable("input_w",
                              shape=x.shape.as_list()[-1],
                              initializer=initializers.get("glorot_uniform"),
                              dtype=tf.float32)
        x_logits = tf.tensordot(x, x_w, axes=[[2], [0]])  # (batch, x_len)
        dot_w = tf.get_variable("dot_w",
                                shape=x.shape.as_list()[-1],
                                initializer=initializers.get("glorot_uniform"),
                                dtype=tf.float32)
        x_dots = x * tf.expand_dims(tf.expand_dims(dot_w, 0), 0)
        dot_logits = tf.matmul(x_dots, keys, transpose_b=True)
        return dot_logits + tf.expand_dims(key_logits, 1) + tf.expand_dims(
            x_logits, 2)
Exemple #7
0
def vdcnn(x,
          filter_width=3,
          init_channel=64,
          num_layers=[2, 2, 2, 2],
          use_shortcut=False,
          k=8,
          is_training=True,
          scope=None):
    layers = []
    x_dim = x.get_shape()[2]

    with tf.variable_scope("temp_conv"):
        filter_shape = [filter_width, x_dim, init_channel]
        W = tf.get_variable(name='temp_1',
                            shape=filter_shape,
                            initializer=initializers.get("glorot_uniform"))
        x = tf.nn.conv1d(x, W, stride=1, padding="SAME")
        layers.append(x)

    now_channel_size = init_channel

    for i, num_layer in enumerate(num_layers):
        for j in range(num_layer):
            with tf.variable_scope("%d_layer_%d_cnn" % (i, j)) as scope:
                shortcut = None
                if use_shortcut and i < len(num_layers) - 1:
                    shortcut = layers[-1]
                conv_ = conv_block(layers[-1], shortcut, filter_width,
                                   now_channel_size, is_training, scope)
                layers.append(conv_)

        if i == len(num_layers) - 1:
            break

        with tf.variable_scope("%d_layer_pool" % (i)) as scope:
            shortcut = None
            if use_shortcut:
                shortcut = layers[-1]
            pool_ = pool_block(layers[-1], shortcut, filter_width, scope)
            layers.append(pool_)

        now_channel_size *= 2

    k_pooled = tf.nn.top_k(tf.transpose(layers[-1], [0, 2, 1]),
                           k=k,
                           name='k_pool',
                           sorted=False)[0]
    flatten = tf.reshape(k_pooled, (-1, now_channel_size * k))
    return flatten
Exemple #8
0
def dilated_causal_conv(x, filter_width=3, dilates=[1, 2, 4], scope=None):
    x_dim = x.get_shape()[-1].value
    with tf.variable_scope(scope or 'dilated_causal_conv'):
        conved = x
        for idx, dilate in enumerate(dilates):
            W = tf.get_variable(name='conv_filter_{}'.format(idx),
                                shape=[filter_width, x_dim, x_dim],
                                initializer=initializers.get('glorot_uniform'))
            W_norm = tf.nn.l2_normalize(W, [1, 2])
            conved = tf.nn.convolution(input=conved,
                                       filter=W_norm,
                                       padding='SAME',
                                       strides=[1],
                                       dilation_rate=[dilate],
                                       name='conved_{}'.format(idx))
    return conved
Exemple #9
0
def shallow_wide_cnn(x, filter_widths, filter_channel):

    layers = []
    x_dim = x.get_shape()[2].value
    x_width = x.get_shape()[1].value
    for idx, filter_width in enumerate(filter_widths):
        with tf.variable_scope('filter_{}'.format(idx)) as scope:
            W = tf.get_variable(name='conv_W',
                                shape=[filter_width, x_dim, filter_channel],
                                initializer=initializers.get("glorot_uniform"))
            conved = tf.nn.conv1d(value=x,
                                  filters=W,
                                  stride=1,
                                  padding='VALID')
            pooled = tf.reduce_max(conved, axis=1)
            layers.append(pooled)

    return tf.concat(layers, axis=1)
Exemple #10
0
    def __init__(self,
                 W_regularizer=None,
                 u_regularizer=None,
                 b_regularizer=None,
                 W_constraint=None,
                 u_constraint=None,
                 b_constraint=None,
                 bias=True,
                 **kwargs):

        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.u_regularizer = regularizers.get(u_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.u_constraint = constraints.get(u_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        super(AttentionWithContext, self).__init__(**kwargs)
Exemple #11
0
def tcn_block(x,
              is_training,
              filter_width=3,
              dilates=[1, 2, 4],
              keep_prob=0.7):
    '''
    reference: https://arxiv.org/pdf/1803.01271.pdf
    '''
    x_dim = x.get_shape()[-1].value
    with tf.variable_scope('dilated_causal_conv_1') as scope:
        conved_1 = dilated_causal_conv(x, filter_width, dilates, scope=scope)
    with tf.variable_scope('relu_dropout') as scope:
        output_1 = tf.nn.relu(conved_1)
        output_1 = layers.dropout(output_1,
                                  keep_prob=keep_prob,
                                  is_training=is_training)
    with tf.variable_scope('dilated_causal_conv_2') as scope:
        conved_2 = dilated_causal_conv(output_1,
                                       filter_width,
                                       dilates,
                                       scope=scope)
    with tf.variable_scope('relu_dropout') as scope:
        output_2 = tf.nn.relu(conved_2)
        output_2 = layers.dropout(output_2,
                                  keep_prob=keep_prob,
                                  is_training=is_training)

    conv_11_W = tf.get_variable(name='conv_11_filter',
                                shape=[1, x_dim, x_dim],
                                initializer=initializers.get('glorot_uniform'))
    conved_11 = tf.nn.convolution(input=x,
                                  filter=conv_11_W,
                                  padding='SAME',
                                  strides=[1],
                                  name='conved_11')

    return tf.nn.relu(output_2 + conved_11)
def get_keras_initialization(name):
    if name is None:
        return None
    return _wrap_init(initializers.get(name))
Exemple #13
0
def get_keras_initialization(name: Union[str, Callable]):
    if name is None:
        return None
    return _wrap_init(initializers.get(name))
    syll_embedder = tf.get_variable('syll_embedder', (syll_size, syll_dim))
    syll_embed = tf.nn.embedding_lookup(syll_embedder, sylls)

    from core_layer import han1_syll_cnn_char_rnn, han1_syll_cnn_char_cnn
    core_layer_output = han1_syll_cnn_char_cnn(config, word_embed, sent_len,
                                               char_embed, word_len,
                                               syll_embed, None, fc_dim,
                                               is_training)

    with tf.variable_scope("output"):
        output = fully_connected(
            core_layer_output,
            fc_dim,
            use_bias=True,
            activation=activations.get("relu"),
            kernel_initializer=initializers.get("glorot_uniform"))
        output = layers.dropout(output,
                                keep_prob=config.keep_prob,
                                is_training=is_training)
        output = fully_connected(
            output,
            1,
            use_bias=True,
            activation=None,
            kernel_initializer=initializers.get("glorot_uniform"))

    y_logits = tf.sigmoid(output) * 9 + 1
    predictions = y_logits
    acc = tf.reduce_mean(
        tf.to_float(tf.equal(tf.round(predictions), tf.round(y_))))
Exemple #15
0
    sx_ = tf.placeholder(tf.int32, (None, max_word_num, max_syll_num),
                         name='sx_')
    y_ = tf.placeholder(tf.int32, (None), name='y_')

    c_embed = tf.get_variable('c_embed', (character_size, char_dim))
    s_embed = tf.get_variable('s_embed', (syllable_size, syll_dim))

    cx = tf.nn.embedding_lookup(c_embed, cx_)
    sx = tf.nn.embedding_lookup(s_embed, sx_)

    core_output = cnn_char_syll(config, wx, cx, sx, is_training)
    preds = fully_connected(
        core_output,
        10,
        activation=activations.get('relu'),
        kernel_initializer=initializers.get('glorot_uniform'))
    pred = tf.argmax(preds, axis=1, output_type=tf.int32) + 1

    y_arr = tf.one_hot(y_, 10)

    acc = tf.reduce_mean(tf.to_float(tf.equal(pred, y_)))
    loss = tf.losses.mean_squared_error(y_arr, preds)
    mse = tf.losses.mean_squared_error(y_, pred)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    ##############################################################################################################

    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()

    # DONOTCHANGE: Reserved for nsml
def rr_han(config, word_embed, sent_len, char_embed, word_len, syll_embed,
           syll_len, n_unit, is_training):
    '''
    HAN 1 layer with char rnn

    @ Input spec

    word_embed [batch_size, max_sent_len, word_dim]
    sent_len [batch_size]
    char_embed [batch_size, max_sent_len, max_word_len, char_dim]
    word_len [batch_size, max_sent_len]
    syll_embed [batch_size, max_sent_len, max_syll_len, syll_dim]
    syll_len [batch_size, max_sent_len]

    @ Output spec
    return [batch, n_unit]
    '''

    char_dim = config.char_dim
    syll_dim = config.syll_dim
    max_sent_len = config.max_sentence_length
    max_word_len = config.max_word_length
    max_syll_num = config.max_syll_num
    keep_prob = config.keep_prob
    rnn_dim = config.rnn_dim

    with tf.variable_scope('syll_rnn') as scope:
        cell_stack_count = 2
        syll_cell = MultiRNNCell([GRUCell(syll_dim)] * cell_stack_count)
        syll_embed = tf.cast(
            tf.reshape(syll_embed, [-1, max_syll_num, syll_dim]), tf.float32)
        syll_len = tf.reshape(syll_len, [-1])

        _, syll_rnn_embed = bidirectional_rnn(syll_cell,
                                              syll_cell,
                                              syll_embed,
                                              syll_len,
                                              scope=scope)

        syll_rnn_embed = tf.reshape(
            syll_rnn_embed,
            [-1, max_sent_len, syll_dim * 2 * cell_stack_count])

    with tf.variable_scope('char_rnn') as scope:
        cell_stack_count = 2
        char_cell = MultiRNNCell([GRUCell(char_dim)] * cell_stack_count)
        char_embed = tf.cast(
            tf.reshape(char_embed, [-1, max_word_len, char_dim]), tf.float32)
        word_len = tf.reshape(word_len, [-1])

        _, char_rnn_embed = bidirectional_rnn(char_cell,
                                              char_cell,
                                              char_embed,
                                              word_len,
                                              scope=scope)

        char_rnn_embed = tf.reshape(
            char_rnn_embed,
            [-1, max_sent_len, char_dim * 2 * cell_stack_count])

    word_char_concat = tf.concat([word_embed, char_rnn_embed, syll_rnn_embed],
                                 axis=2)

    with tf.variable_scope('embedding') as scope:
        word_char_embed = fully_connected(
            word_char_concat,
            rnn_dim,
            use_bias=True,
            activation=activations.get("relu"),
            kernel_initializer=initializers.get("glorot_uniform"))

        with tf.variable_scope('dropout'):
            word_char_embed = layers.dropout(
                word_char_embed,
                keep_prob=keep_prob,
                is_training=is_training,
            )

    with tf.variable_scope('encoder') as scope:
        cell = MultiRNNCell([GRUCell(rnn_dim)] * 3)
        encoder_output, _ = bidirectional_rnn(cell,
                                              cell,
                                              word_char_embed,
                                              sent_len,
                                              scope=scope)

        with tf.variable_scope('attention') as scope:
            attn_sum_output = task_specific_attention(encoder_output,
                                                      n_unit,
                                                      scope=scope)

        with tf.variable_scope('dropout'):
            attn_sum_output = layers.dropout(
                attn_sum_output,
                keep_prob=keep_prob,
                is_training=is_training,
            )

    return attn_sum_output