def F(inputs, d, activation=tf.nn.relu, kernel_initializer=None, scope=None, use_bias=True, input_keep_prob=1.0, wd=0.0, is_train=None):
    out = dropout(inputs, input_keep_prob, is_train)
    with tf.variable_scope(scope or "projection"):
        out = tf.layers.dense(out, d, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer)
        if wd:
            add_wd(wd)
    return out
def linear(args,
           output_size,
           bias,
           bias_start=0.0,
           scope=None,
           squeeze=False,
           wd=0.0,
           input_keep_prob=1.0,
           is_train=None,
           kernel_initializer=None):

    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError("`args` must be specified")
    if not nest.is_sequence(args):
        args = [args]

    flat_args = [flatten(arg, 1)
                 for arg in args]  # flat_args[0] : [N*JX*JQ, d]
    if input_keep_prob < 1.0:
        assert is_train is not None
        flat_args = [
            tf.cond(is_train, lambda: tf.nn.dropout(arg, input_keep_prob),
                    lambda: arg) for arg in flat_args
        ]
    with tf.variable_scope(scope or 'linear'):
        flat_out = _linear(flat_args,
                           output_size,
                           bias,
                           kernel_initializer=kernel_initializer)
    out = reconstruct(flat_out, args[0], 1)
    if squeeze:
        out = tf.squeeze(out, [len(args[0].get_shape().as_list()) - 1])
    if wd:
        add_wd(wd)
    return out
def conv2d(in_,
           filter_size,
           height,
           padding,
           dilation_rate=None,
           wd=0.0,
           is_train=None,
           keep_prob=1.0,
           scope=None,
           nonlinear=True):
    with tf.variable_scope(scope or "conv2d"):
        num_channels = in_.get_shape().as_list()[-1]
        N = tf.shape(in_)[0]
        in_ = tf.expand_dims(in_, 1)
        filter_ = tf.get_variable("filter_",
                                  shape=[1, height, num_channels, filter_size],
                                  dtype='float')
        bias = tf.get_variable("bias", shape=[filter_size], dtype='float')
        strides = [1, 1]
        dilation_rate = [1, dilation_rate or 1]
        if is_train is not None and keep_prob < 1.0:
            in_ = dropout(in_, keep_prob, is_train)
        out = tf.nn.convolution(in_,
                                filter_,
                                padding,
                                strides=strides,
                                dilation_rate=dilation_rate) + bias
        if nonlinear:
            out = tf.nn.tanh(out)
        out = tf.reshape(out, [N, -1, filter_size])
        if wd:
            add_wd(wd)
        return out
Exemple #4
0
def conv1d(in_, filter_size, height, padding, wd=0.0, is_train=None, keep_prob=1.0, scope=None):
    with tf.variable_scope(scope or "conv1d"):
        num_channels = in_.get_shape()[-1]
        filter_ = tf.get_variable("filter", shape=[1, height, num_channels, filter_size], dtype='float')
        bias = tf.get_variable("bias", shape=[filter_size], dtype='float')
        strides = [1, 1, 1, 1]
        if is_train is not None and keep_prob < 1.0:
            in_ = dropout(in_, keep_prob, is_train)
        xxc = tf.nn.conv2d(in_, filter_, strides, padding) + bias  # [N*M, JX, W/filter_stride, d]
        out = tf.reduce_max(tf.nn.relu(xxc), 2)  # [-1, JX, d]
        if wd:
            add_wd(wd)
        return out
def rnn(rnn_type, inputs, length, hidden_size, num_layers=1, state_keep_prob=1.0, 
        dropout_keep_prob=None, concat=True, initial_state=None, 
        kernel_initializer=tf.random_normal_initializer(stddev=0.1), wd=0.0, is_train=False, scope=None):
    with tf.variable_scope(scope or 'rnn'):
        if not rnn_type.startswith('bi'):
            cell = get_cell(rnn_type, hidden_size, num_layers, dropout_keep_prob, kernel_initializer=kernel_initializer, is_train=is_train)
            outputs, states = tf.nn.dynamic_rnn(cell, inputs, sequence_length=length, dtype=tf.float32, initial_state=initial_state)
            if rnn_type.endswith('lstm'):
                h = states[0][1]
                # h = [state.h for state in states]
                state = h
        else:
            cell_fw = get_cell(rnn_type, hidden_size, num_layers, dropout_keep_prob, state_keep_prob, kernel_initializer=kernel_initializer, is_train=is_train)
            cell_bw = get_cell(rnn_type, hidden_size, num_layers, dropout_keep_prob, state_keep_prob, kernel_initializer=kernel_initializer, is_train=is_train)
            if initial_state is not None:
                outputs, states = tf.nn.bidirectional_dynamic_rnn(
                    cell_bw, cell_fw, inputs, sequence_length=length, dtype=tf.float32, 
                    initial_state_fw=initial_state[:, :hidden_size], initial_state_bw=initial_state[:, hidden_size:]
                )
            else:
                outputs, states = tf.nn.bidirectional_dynamic_rnn(
                    cell_bw, cell_fw, inputs, sequence_length=length, dtype=tf.float32
                )
            state_fw, state_bw = states
            if rnn_type.endswith('lstm'):
                h_fw = state_fw[0][1]
                h_bw = state_bw[0][1]
                # h_fw = [state_fw.h for state_fw in states_fw]
                # h_bw = [state_bw.h for state_bw in states_bw]
                state_fw, state_bw = h_fw, h_bw
            if concat:
                outputs = tf.concat(outputs, 2)
                state = tf.concat([state_fw, state_bw], 1)
            else:
                outputs = outputs[0] + outputs[1]
                state = state_fw + state_bw
            if wd:
                add_wd(wd)
        return outputs, state
def cudnn_rnn(rnn_type, inputs, length, hidden_size, num_layers=1, 
        dropout_keep_prob=1.0, concat=True, initial_state=None, 
        kernel_initializer=tf.random_normal_initializer(stddev=0.1), wd=0.0, is_train=False, scope=None):
    with tf.variable_scope(scope or 'cudnn_rnn'):
        direction = "bidirectional" if 'bi' in rnn_type else "unidirectional"
        input_size = inputs.get_shape().as_list()[-1]
        if rnn_type.endswith('gru'):
            rnn = CudnnGRU(num_layers=num_layers, num_units=hidden_size, 
                            input_mode='linear_input', direction=direction, 
                            dropout=1-dropout_keep_prob, name='rnn')
        
        elif rnn_type.endswith('lstm'):
            rnn = CudnnLSTM(num_layers=num_layers, num_units=hidden_size, 
                            input_mode='linear_input', direction=direction, 
                            dropout=1-dropout_keep_prob, name='rnn')
        else:
            raise NotImplementedError("{} is not supported.".format(rnn_type))
        inputs = dropout(inputs, dropout_keep_prob, is_train)
        outputs, _ = rnn(tf.transpose(inputs, [1, 0, 2]))
        outputs = tf.transpose(outputs, [1, 0, 2]) # [N, JX, 2*d]
        output_h = None
        if wd:
            add_wd(wd)
        return outputs, output_h