def sample_pipeline(inputs, modality, reuse=False): prefix = self.config['prefixes'][modality] # We apply dropout at the input. # We do want to set whole pixels to 0, therefore out noise-shape has # dim 1 for the channel-space: input_shape = tf.shape(inputs) noise_shape = [ input_shape[0], input_shape[1], input_shape[2], 1 ] inputs = dropout(inputs, rate=self.config['dropout_rate'], training=True, noise_shape=noise_shape, name='{}_dropout'.format(prefix)) return get_prob(inputs, modality, reuse=reuse)
def encoder(inputs, prefix, num_units, dropout_rate, trainable=True, batchnorm=True, is_training=False, reuse=tf.AUTO_REUSE, dropout_layers=[]): """ VGG16 image encoder with fusion of conv4_3 and conv5_3 features. Args: inputs: input tensor, in channel-last-format prefix: prefix of any variable name num_units: Number of feature units in the FCN. batchnorm (bool): Whether or not to perform batch normalization. trainable (bool): If False, variables are not trainable. is_training (bool): Indicator whether batch_normalization should be in training (batch) or testing (continuous) mode. reuse (bool): If true, reuse existing variables of same name (attention with prefix). Will raise error if it cannot find such variables. dropout_layers: a list of layers after which to apply dropout. Accepted possible values are 'pool3' and 'pool4' Returns: Dict of (intermediate) layer outputs. The encoding has key 'fused'. """ # These parameters are shared between many/all layers and therefore defined here # for convenience. params = { 'activation': tf.nn.relu, 'padding': 'same', 'reuse': reuse, 'batch_normalization': batchnorm, 'training': is_training, 'trainable': trainable } with tf.variable_scope(prefix, reuse=reuse): # dict for all the layers l = {} l['conv1_1'] = conv2d(inputs, 64, [3, 3], name='conv1_1', **params) l['conv1_2'] = conv2d(l['conv1_1'], 64, [3, 3], name='conv1_2', **params) l['pool1'] = max_pooling2d(l['conv1_2'], [2, 2], [2, 2], name='pool1') l['conv2_1'] = conv2d(l['pool1'], 128, [3, 3], name='conv2_1', **params) l['conv2_2'] = conv2d(l['conv2_1'], 128, [3, 3], name='conv2_2', **params) l['pool2'] = max_pooling2d(l['conv2_2'], [2, 2], [2, 2], name='pool2') l['conv3_1'] = conv2d(l['pool2'], 256, [3, 3], name='conv3_1', **params) l['conv3_2'] = conv2d(l['conv3_1'], 256, [3, 3], name='conv3_2', **params) l['conv3_3'] = conv2d(l['conv3_2'], 256, [3, 3], name='conv3_3', **params) l['pool3'] = max_pooling2d(l['conv3_3'], [2, 2], [2, 2], name='pool3') # dropout after pool3 last_layer = l['pool3'] if 'pool3' in dropout_layers: l['pool3_drop'] = dropout(l['pool3'], rate=dropout_rate, training=True, name='pool3_dropout') last_layer = l['pool3_drop'] l['conv4_1'] = conv2d(last_layer, 512, [3, 3], name='conv4_1', **params) l['conv4_2'] = conv2d(l['conv4_1'], 512, [3, 3], name='conv4_2', **params) l['conv4_3'] = conv2d(l['conv4_2'], 512, [3, 3], name='conv4_3', **params) l['pool4'] = max_pooling2d(l['conv4_3'], [2, 2], [2, 2], name='pool4') # dropout after pool4 last_layer = l['pool4'] if 'pool3' in dropout_layers: l['pool4_drop'] = dropout(l['pool4'], rate=dropout_rate, training=True, name='pool4_dropout') last_layer = l['pool4_drop'] l['conv5_1'] = conv2d(last_layer, 512, [3, 3], name='conv5_1', **params) l['conv5_2'] = conv2d(l['conv5_1'], 512, [3, 3], name='conv5_2', **params) l['conv5_3'] = conv2d(l['conv5_2'], 512, [3, 3], name='conv5_3', **params) # Use 1x1 convolutions on conv4_3 and conv5_3 to define features. # first, maybe apply dropout at these layers? conv4_3 = l['conv4_3'] if 'conv4_3' in dropout_layers: conv4_3 = dropout(conv4_3, rate=dropout_rate, training=True, name='conv4_3_dropout') score_conv4 = conv2d(conv4_3, num_units, [1, 1], name='score_conv4', **params) conv5_3 = l['conv5_3'] if 'conv5_3' in dropout_layers: conv5_3 = dropout(conv5_3, rate=dropout_rate, training=True, name='conv5_3_dropout') score_conv5 = conv2d(conv5_3, num_units, [1, 1], name='score_conv5', **params) # The deconvolution is always set static. params['trainable'] = False upscore_conv5 = deconv2d(score_conv5, num_units, [4, 4], strides=[2, 2], name='upscore_conv5', **params) l['fused'] = tf.add_n([score_conv4, upscore_conv5], name='add_score') # Return dictionary of all layers return l
def decoder(features, prefix, num_units, num_classes, trainable=True, is_training=False, reuse=tf.AUTO_REUSE, dropout_rate=None, batchnorm=True): """ FCN feature decoder. Args: features: input tensor, in feature-last format prefix: prefix of any variable name num_units: Number of feature units in the FCN. num_classes: Number of output classes. dropout_rate: Dropout rate for dropout applied on input feature. Set to 0 to disable dropout. batchnorm (bool): Whether or not to perform batch normalization. trainable (bool): If False, variables are not trainable. is_training (bool): Indicator whether batch_normalization should be in training (batch) or testing (continuous) mode. reuse (bool): If true, reuse existing variables of same name (attention with prefix). Will raise error if it cannot find such variables. dropout_rate: If set, apply dropout on the decoder input with the given rate Returns: dict of (intermediate) layer outputs. The final per-class activations have key 'score'. """ # These parameters are shared between many/all layers and therefore defined here # for convenience. params = { 'activation': tf.nn.relu, 'padding': 'same', 'reuse': reuse, 'batch_normalization': batchnorm, 'training': is_training, 'trainable': trainable } # Upscore layers are never trainable upscore_params = deepcopy(params) upscore_params['trainable'] = False with tf.variable_scope(prefix, reuse=reuse): if dropout_rate is not None: features = dropout(features, rate=dropout_rate, training=True, name='features_dropout') # Upsample the fused features to the output classification size features = deconv2d(features, num_units, [16, 16], strides=[8, 8], name='upscore', **upscore_params) # no activation before the softmax params['activation'] = None score = conv2d(features, num_classes, [1, 1], name='score', **params) return {'upscore': features, 'score': score}
def call(self, inputs, state): """Run one step of LSTM. Args: inputs: input Tensor, 2D, `[batch, num_units]. state: if `state_is_tuple` is False, this must be a state Tensor, `2-D, [batch, state_size]`. If `state_is_tuple` is True, this must be a tuple of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`. Returns: A tuple containing: - A `2-D, [batch, output_dim]`, Tensor representing the output of the LSTM after reading `inputs` when previous state was `state`. Here output_dim is: num_proj if num_proj was set, num_units otherwise. - Tensor(s) representing the new state of LSTM after reading `inputs` when the previous state was `state`. Same type and shape(s) as `state`. Raises: ValueError: If input size cannot be inferred from inputs via static shape inference. """ num_proj = self._num_units if self._num_proj is None else self._num_proj sigmoid = math_ops.sigmoid if self._state_is_tuple: (c_prev, m_prev) = state else: c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError("Could not infer input size from inputs.get_shape()[-1]") # i = input_gate, j = new_input, f = forget_gate, o = output_gate lstm_matrix = math_ops.matmul(array_ops.concat([inputs, m_prev], 1), self._kernel) lstm_matrix = nn_ops.bias_add(lstm_matrix, self._bias) i, j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=4, axis=1) binary_mask_cell = dropout(array_ops.ones_like(c_prev), rate=self.cell_zoneout_rate, training=self.is_training) binary_mask_cell_complement = array_ops.ones_like(binary_mask_cell) - binary_mask_cell binary_mask_output = dropout(array_ops.ones_like(m_prev), rate=self.output_zoneout_rate, training=self.is_training) binary_mask_output_complement = array_ops.ones_like(binary_mask_output) - binary_mask_output # Diagonal connections if self._use_peepholes: c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev + sigmoid(i + self._w_i_diag * c_prev) * self._activation(j)) else: c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * self._activation(j)) if self._cell_clip is not None: # pylint: disable=invalid-unary-operand-type c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) # pylint: enable=invalid-unary-operand-type c = binary_mask_cell * c + binary_mask_cell_complement * c_prev; if self._use_peepholes: m = sigmoid(o + self._w_o_diag * c) * self._activation(c) else: m = sigmoid(o) * self._activation(c) if self._num_proj is not None: m = math_ops.matmul(m, self._proj_kernel) if self._proj_clip is not None: # pylint: disable=invalid-unary-operand-type m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) # pylint: enable=invalid-unary-operand-type m = binary_mask_output * m + binary_mask_output_complement * m_prev; new_state = LSTMStateTuple(c, m) if self._state_is_tuple else array_ops.concat([c, m], 1) return m, new_state