def F(inputs, d, activation=tf.nn.relu, kernel_initializer=None, scope=None, use_bias=True, input_keep_prob=1.0, wd=0.0, is_train=None): out = dropout(inputs, input_keep_prob, is_train) with tf.variable_scope(scope or "projection"): out = tf.layers.dense(out, d, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer) if wd: add_wd(wd) return out
def linear(args, output_size, bias, bias_start=0.0, scope=None, squeeze=False, wd=0.0, input_keep_prob=1.0, is_train=None, kernel_initializer=None): if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): args = [args] flat_args = [flatten(arg, 1) for arg in args] # flat_args[0] : [N*JX*JQ, d] if input_keep_prob < 1.0: assert is_train is not None flat_args = [ tf.cond(is_train, lambda: tf.nn.dropout(arg, input_keep_prob), lambda: arg) for arg in flat_args ] with tf.variable_scope(scope or 'linear'): flat_out = _linear(flat_args, output_size, bias, kernel_initializer=kernel_initializer) out = reconstruct(flat_out, args[0], 1) if squeeze: out = tf.squeeze(out, [len(args[0].get_shape().as_list()) - 1]) if wd: add_wd(wd) return out
def conv2d(in_, filter_size, height, padding, dilation_rate=None, wd=0.0, is_train=None, keep_prob=1.0, scope=None, nonlinear=True): with tf.variable_scope(scope or "conv2d"): num_channels = in_.get_shape().as_list()[-1] N = tf.shape(in_)[0] in_ = tf.expand_dims(in_, 1) filter_ = tf.get_variable("filter_", shape=[1, height, num_channels, filter_size], dtype='float') bias = tf.get_variable("bias", shape=[filter_size], dtype='float') strides = [1, 1] dilation_rate = [1, dilation_rate or 1] if is_train is not None and keep_prob < 1.0: in_ = dropout(in_, keep_prob, is_train) out = tf.nn.convolution(in_, filter_, padding, strides=strides, dilation_rate=dilation_rate) + bias if nonlinear: out = tf.nn.tanh(out) out = tf.reshape(out, [N, -1, filter_size]) if wd: add_wd(wd) return out
def conv1d(in_, filter_size, height, padding, wd=0.0, is_train=None, keep_prob=1.0, scope=None): with tf.variable_scope(scope or "conv1d"): num_channels = in_.get_shape()[-1] filter_ = tf.get_variable("filter", shape=[1, height, num_channels, filter_size], dtype='float') bias = tf.get_variable("bias", shape=[filter_size], dtype='float') strides = [1, 1, 1, 1] if is_train is not None and keep_prob < 1.0: in_ = dropout(in_, keep_prob, is_train) xxc = tf.nn.conv2d(in_, filter_, strides, padding) + bias # [N*M, JX, W/filter_stride, d] out = tf.reduce_max(tf.nn.relu(xxc), 2) # [-1, JX, d] if wd: add_wd(wd) return out
def rnn(rnn_type, inputs, length, hidden_size, num_layers=1, state_keep_prob=1.0, dropout_keep_prob=None, concat=True, initial_state=None, kernel_initializer=tf.random_normal_initializer(stddev=0.1), wd=0.0, is_train=False, scope=None): with tf.variable_scope(scope or 'rnn'): if not rnn_type.startswith('bi'): cell = get_cell(rnn_type, hidden_size, num_layers, dropout_keep_prob, kernel_initializer=kernel_initializer, is_train=is_train) outputs, states = tf.nn.dynamic_rnn(cell, inputs, sequence_length=length, dtype=tf.float32, initial_state=initial_state) if rnn_type.endswith('lstm'): h = states[0][1] # h = [state.h for state in states] state = h else: cell_fw = get_cell(rnn_type, hidden_size, num_layers, dropout_keep_prob, state_keep_prob, kernel_initializer=kernel_initializer, is_train=is_train) cell_bw = get_cell(rnn_type, hidden_size, num_layers, dropout_keep_prob, state_keep_prob, kernel_initializer=kernel_initializer, is_train=is_train) if initial_state is not None: outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_bw, cell_fw, inputs, sequence_length=length, dtype=tf.float32, initial_state_fw=initial_state[:, :hidden_size], initial_state_bw=initial_state[:, hidden_size:] ) else: outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_bw, cell_fw, inputs, sequence_length=length, dtype=tf.float32 ) state_fw, state_bw = states if rnn_type.endswith('lstm'): h_fw = state_fw[0][1] h_bw = state_bw[0][1] # h_fw = [state_fw.h for state_fw in states_fw] # h_bw = [state_bw.h for state_bw in states_bw] state_fw, state_bw = h_fw, h_bw if concat: outputs = tf.concat(outputs, 2) state = tf.concat([state_fw, state_bw], 1) else: outputs = outputs[0] + outputs[1] state = state_fw + state_bw if wd: add_wd(wd) return outputs, state
def cudnn_rnn(rnn_type, inputs, length, hidden_size, num_layers=1, dropout_keep_prob=1.0, concat=True, initial_state=None, kernel_initializer=tf.random_normal_initializer(stddev=0.1), wd=0.0, is_train=False, scope=None): with tf.variable_scope(scope or 'cudnn_rnn'): direction = "bidirectional" if 'bi' in rnn_type else "unidirectional" input_size = inputs.get_shape().as_list()[-1] if rnn_type.endswith('gru'): rnn = CudnnGRU(num_layers=num_layers, num_units=hidden_size, input_mode='linear_input', direction=direction, dropout=1-dropout_keep_prob, name='rnn') elif rnn_type.endswith('lstm'): rnn = CudnnLSTM(num_layers=num_layers, num_units=hidden_size, input_mode='linear_input', direction=direction, dropout=1-dropout_keep_prob, name='rnn') else: raise NotImplementedError("{} is not supported.".format(rnn_type)) inputs = dropout(inputs, dropout_keep_prob, is_train) outputs, _ = rnn(tf.transpose(inputs, [1, 0, 2])) outputs = tf.transpose(outputs, [1, 0, 2]) # [N, JX, 2*d] output_h = None if wd: add_wd(wd) return outputs, output_h