def conv_lstm_cell_no_input(state, num_channels, initializer, filter_size=5, forget_bias=1.0, scope=None, reuse=None): with tf.variable_scope(scope, default_name='BasicConvLstmCell', values=[state], reuse=reuse): state.get_shape().assert_has_rank(4) c, h = tf.split(3, 2, state) # Parameters of gates are concatenated into one conv for efficiency. i_j_f_o = layers.conv2d(h, 4 * num_channels, [filter_size, filter_size], stride=1, activation_fn=None, scope='Gates', weights_initializer=initializer) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(3, 4, i_j_f_o) new_c = c * tf.sigmoid(f + forget_bias) + tf.sigmoid(i) * tf.tanh(j) new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, tf.concat(3, [new_c, new_h])
def basic_conv_lstm_cell_leakyrelu_norm(inputs, state, num_channels, filter_size=5, rate=1, forget_bias=1.0, stride=1, scope=None, reuse=None): """LSTM with leakyRELU activation and layer normalization. We add forget_bias (default: 1) to the biases of the forget gate in order to reduce the scale of forgetting in the beginning of the training. It does not allow cell clipping, a projection layer, and does not use peep-hole connections: it is the basic baseline. Args: inputs: input Tensor, 4D, batch x height x width x channels. state: state Tensor, 4D, batch x height x width x channels. num_channels: the number of output channels in the layer. filter_size: the shape of the each convolution filter. forget_bias: the initial value of the forget biases. scope: Optional scope for variable_scope. reuse: whether or not the layer and the variables should be reused. Returns: a tuple of tensors representing output and the new state. """ spatial_size = inputs.get_shape()[1:3] if state is None: state = init_state(inputs, list(spatial_size) + [2 * num_channels]) with tf.variable_scope(scope, 'BasicConvLstmCell', [inputs, state], reuse=reuse): inputs.get_shape().assert_has_rank(4) state.get_shape().assert_has_rank(4) c, h = tf.split(axis=3, num_or_size_splits=2, value=state) inputs_h = tf.concat(axis=3, values=[inputs, h]) # Parameters of gates are concatenated into one conv for efficiency. i_j_f_o = layers.conv2d(inputs_h, 4 * num_channels, [filter_size, filter_size], stride=stride, rate=rate, activation_fn=None, scope='Gates') # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(axis=3, num_or_size_splits=4, value=i_j_f_o) new_c = c * tf.sigmoid(f + forget_bias) + tf.sigmoid( i) * tf.nn.leaky_relu(j, alpha=0.1) new_c = tf_layers.layer_norm(new_c, scope="normalisation", reuse=reuse) new_h = tf.nn.leaky_relu(new_c, alpha=0.1) * tf.sigmoid(o) return new_h, tf.concat(axis=3, values=[new_c, new_h])
def basic_conv_lstm_cell(inputs, state, num_channels, filter_size=5, forget_bias=1.0, scope=None, reuse=None): """Basic LSTM recurrent network cell, with 2D convolution connctions. 具有2D卷积连接的基本LSTM递归网络单元 We add forget_bias (default: 1) to the biases of the forget gate in order to reduce the scale of forgetting in the beginning of the training. It does not allow cell clipping, a projection layer, and does not use peep-hole connections: it is the basic baseline. 为了减少训练开始时的遗忘规模,我们在忘记门的偏差上添加了ignore_bias(默认值:1)。 Args: inputs: input Tensor, 4D, batch x height x width x channels. state: state Tensor, 4D, batch x height x width x channels. num_channels: the number of output channels in the layer. 图层中输出通道的数量 filter_size: the shape of the each convolution filter. 过滤器大小 forget_bias: the initial value of the forget biases. 遗忘偏差 scope: Optional scope for variable_scope. 可选范围 reuse: whether or not the layer and the variables should be reused. 是否重用图层和变量 Returns: a tuple of tensors representing output and the new state. 代表输出和新状态的张量的元组 """ spatial_size = inputs.get_shape()[1:3] if state is None: state = init_state(inputs, list(spatial_size) + [2 * num_channels]) with tf.variable_scope(scope, 'BasicConvLstmCell', [inputs, state], reuse=reuse): inputs.get_shape().assert_has_rank(4) state.get_shape().assert_has_rank(4) c, h = tf.split(axis=3, num_or_size_splits=2, value=state) inputs_h = tf.concat(axis=3, values=[inputs, h]) # Parameters of gates are concatenated into one conv for efficiency. #为了提高效率,将Gates的参数串联到一个conv中。 i_j_f_o = layers.conv2d(inputs_h, 4 * num_channels, [filter_size, filter_size], stride=1, activation_fn=None, scope='Gates') # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(axis=3, num_or_size_splits=4, value=i_j_f_o) new_c = c * tf.sigmoid(f + forget_bias) + tf.sigmoid(i) * tf.tanh(j) new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, tf.concat(axis=3, values=[new_c, new_h])
def basic_conv_lstm_cell(inputs, state, num_channels, filter_size=5, forget_bias=1.0, scope=None, reuse=None, device_for_variables=None): """Basic LSTM recurrent network cell, with 2D convolution connctions. We add forget_bias (default: 1) to the biases of the forget gate in order to reduce the scale of forgetting in the beginning of the training. It does not allow cell clipping, a projection layer, and does not use peep-hole connections: it is the basic baseline. Args: inputs: input Tensor, 4D, batch x height x width x channels. state: state Tensor, 4D, batch x height x width x channels. num_channels: the number of output channels in the layer. filter_size: the shape of the each convolution filter. forget_bias: the initial value of the forget biases. scope: Optional scope for variable_scope. reuse: whether or not the layer and the variables should be reused. Returns: a tuple of tensors representing output and the new state. """ spatial_size = inputs.get_shape()[1:3] if state is None: state = init_state(inputs, list(spatial_size) + [2 * num_channels]) with tf.variable_scope(scope, 'BasicConvLstmCell', [inputs, state], reuse=reuse): inputs.get_shape().assert_has_rank(4) state.get_shape().assert_has_rank(4) c, h = tf.split(3, 2, state) inputs_h = tf.concat(3, [inputs, h]) # Parameters of gates are concatenated into one conv for efficiency. i_j_f_o = layers.conv2d(inputs_h, 4 * num_channels, [filter_size, filter_size], stride=1, activation_fn=None, scope='Gates', device_for_variables= device_for_variables) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(3, 4, i_j_f_o) new_c = c * tf.sigmoid(f + forget_bias) + tf.sigmoid(i) * tf.tanh(j) new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, tf.concat(3, [new_c, new_h])
def basic_conv_lstm_cell(inputs, state, num_channels, filter_size=5, forget_bias=1.0, scope=None, reuse=None): """Basic LSTM recurrent network cell, with 2D convolution connctions. We add forget_bias (default: 1) to the biases of the forget gate in order to reduce the scale of forgetting in the beginning of the training. It does not allow cell clipping, a projection layer, and does not use peep-hole connections: it is the basic baseline. Args: inputs: input Tensor, 4D, batch x height x width x channels. state: state Tensor, 4D, batch x height x width x channels. num_channels: the number of output channels in the layer. filter_size: the shape of the each convolution filter. forget_bias: the initial value of the forget biases. scope: Optional scope for variable_scope. reuse: whether or not the layer and the variables should be reused. Returns: a tuple of tensors representing output and the new state. """ spatial_size = inputs.get_shape()[1:3] if state is None: state = init_state(inputs, list(spatial_size) + [2 * num_channels]) with tf.variable_scope(scope, 'BasicConvLstmCell', [inputs, state], reuse=reuse): inputs.get_shape().assert_has_rank(4) state.get_shape().assert_has_rank(4) c, h = tf.split(axis=3, num_or_size_splits=2, value=state) inputs_h = tf.concat(axis=3, values=[inputs, h]) # Parameters of gates are concatenated into one conv for efficiency. i_j_f_o = layers.conv2d(inputs_h, 4 * num_channels, [filter_size, filter_size], stride=1, activation_fn=None, scope='Gates') # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(axis=3, num_or_size_splits=4, value=i_j_f_o) new_c = c * tf.sigmoid(f + forget_bias) + tf.sigmoid(i) * tf.tanh(j) new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, tf.concat(axis=3, values=[new_c, new_h])
def cubic_lstm_cell(inputs, state_x, state_y, num_channels, filter_size_x=3, filter_size_y=1, filter_size_z=5, forget_bias=1.0, scope=None, reuse=None): spatial_size = inputs.get_shape()[1:3] if state_x is None: state_x = init_state(inputs, list(spatial_size) + [2 * num_channels]) if state_y is None: state_y = init_state(inputs, list(spatial_size) + [2 * num_channels]) with tf.variable_scope(scope, 'CubicLstmCell', [inputs, state_x, state_y], reuse=reuse): inputs.get_shape().assert_has_rank(4) state_x.get_shape().assert_has_rank(4) state_y.get_shape().assert_has_rank(4) c_x, h_x = tf.split(axis=3, num_or_size_splits=2, value=state_x) c_y, h_y = tf.split(axis=3, num_or_size_splits=2, value=state_y) inputs_h = tf.concat(axis=3, values=[inputs, h_x, h_y]) # Spatial i_j_f_o_y = layers.conv2d(inputs_h, 4 * num_channels, [filter_size_x, filter_size_x], stride=1, activation_fn=None, scope='GatesY') i_y, j_y, f_y, o_y = tf.split(axis=3, num_or_size_splits=4, value=i_j_f_o_y) new_c_y = c_y * tf.sigmoid(f_y + forget_bias) + tf.sigmoid( i_y) * tf.tanh(j_y) new_h_y = tf.tanh(new_c_y) * tf.sigmoid(o_y) # Temporal i_j_f_o_x = layers.conv2d(inputs_h, 4 * num_channels, [filter_size_y, filter_size_y], stride=1, activation_fn=None, scope='GatesX') i_x, j_x, f_x, o_x = tf.split(axis=3, num_or_size_splits=4, value=i_j_f_o_x) new_c_x = c_x * tf.sigmoid(f_x + forget_bias) + tf.sigmoid( i_x) * tf.tanh(j_x) new_h_x = tf.tanh(new_c_x) * tf.sigmoid(o_x) # Output new_h = layers.conv2d(tf.concat(axis=3, values=[new_h_x, new_h_y]), num_channels, [filter_size_z, filter_size_z], stride=1, activation_fn=None, scope='Output') return new_h, tf.concat(axis=3, values=[new_c_x, new_h_x]), tf.concat( axis=3, values=[new_c_y, new_h_y])
def TrajGRUCell(inputs, state, num_channels, filter_size=5, forget_bias=1.0, scope=None, reuse=None): """Basic LSTM recurrent network cell, with 2D convolution connctions. We add forget_bias (default: 1) to the biases of the forget gate in order to reduce the scale of forgetting in the beginning of the training. It does not allow cell clipping, a projection layer, and does not use peep-hole connections: it is the basic baseline. Args: inputs: input Tensor, 4D, batch x height x width x channels. state: state Tensor, 4D, batch x height x width x channels. num_channels: the number of output channels in the layer. filter_size: the shape of the each convolution filter. forget_bias: the initial value of the forget biases. scope: Optional scope for variable_scope. reuse: whether or not the layer and the variables should be reused. Returns: a tuple of tensors representing output and the new state. """ spatial_size = inputs.get_shape()[1:3] ti = tf.range(spatial_size[0]) tj = tf.range(spatial_size[1]) # chn = inputs.get_shape()[3] mi = tf.meshgrid(ti, tj)[0] mj = tf.transpose(mi) mi = tf.cast(tf.reshape(mi, [1, spatial_size[0], spatial_size[1], 1]), dtype=tf.float32) mj = tf.cast(tf.reshape(mj, [1, spatial_size[1], spatial_size[0], 1]), dtype=tf.float32) if state is None: state = init_state(inputs, list(spatial_size) + [num_channels]) with tf.variable_scope(scope, 'TrajGRUCell', [inputs, state], reuse=reuse): inputs.get_shape().assert_has_rank(4) state.get_shape().assert_has_rank(4) # ow, h = tf.split(axis=3, num_or_size_splits=2, value=state) h = state # h=state inputs_h = tf.concat(axis=3, values=[inputs, h]) # TrajGRU u = layers.conv2d(inputs_h, num_channels, [5, 5], scope="u") v = layers.conv2d(inputs_h, num_channels, [5, 5], scope="v") z_r_o = layers.conv2d(inputs, 3 * num_channels, [filter_size, filter_size], stride=1, activation_fn=None, scope='Gates') z, r, o = tf.split(axis=3, num_or_size_splits=3, value=z_r_o) # ============== 开写 mi = tf.tile(mi, [inputs.get_shape()[0], 1, 1, num_channels]) mj = tf.tile(mj, [inputs.get_shape()[0], 1, 1, num_channels]) wp = warp(h, u, v, mi, mj) # if not warpfields: # warpfields = [] # warpfields.append(wp) # sumw = 0 # for i in range(L): # tscope = "w%d" % i # if i < warpfields.__len__(): # tmp = warpfields[-1 * i] # w = layers.conv2d(tmp, 3 * num_channels, [1, 1], stride=1, # scope=tscope) # sumw += w # else: # tmp = warpfields[-1] # w = layers.conv2d(tmp, 3 * num_channels, [1, 1], stride=1, # scope=tscope) w = layers.conv2d(wp, 3 * L, [1, 1], stride=1, scope='warp') zw, rw, ow = tf.split(axis=3, num_or_size_splits=3, value=w) # w = tf.reduce_sum(w, axis=3) # sumw += w # w = tf.reduce_sum(w, axis=0) zw = tf.reshape(tf.reduce_sum(zw, axis=3), [ inputs.get_shape()[0], inputs.get_shape()[1], inputs.get_shape()[2], 1 ]) rw = tf.reshape(tf.reduce_sum(rw, axis=3), [ inputs.get_shape()[0], inputs.get_shape()[1], inputs.get_shape()[2], 1 ]) ow = tf.reshape(tf.reduce_sum(ow, axis=3), [ inputs.get_shape()[0], inputs.get_shape()[1], inputs.get_shape()[2], 1 ]) z = tf.sigmoid(z + tf.tile(zw, [1, 1, 1, num_channels])) r = tf.sigmoid(r + tf.tile(rw, [1, 1, 1, num_channels])) # 论文中f函数,作者没描述,这里用tanh替代 # hp = tf.tanh(o + tf.multiply(r, sumw)) hp = tf.tanh(o + r * tf.tile(ow, [1, 1, 1, num_channels])) # new_h = tf.multiply(1 - z, hp) + tf.multiply(z, h) new_h = (1 - z) * hp + z * h return new_h, new_h