def sample_pipeline(inputs, modality, reuse=False):
                prefix = self.config['prefixes'][modality]

                # We apply dropout at the input.
                # We do want to set whole pixels to 0, therefore out noise-shape has
                # dim 1 for the channel-space:
                input_shape = tf.shape(inputs)
                noise_shape = [
                    input_shape[0], input_shape[1], input_shape[2], 1
                ]
                inputs = dropout(inputs,
                                 rate=self.config['dropout_rate'],
                                 training=True,
                                 noise_shape=noise_shape,
                                 name='{}_dropout'.format(prefix))
                return get_prob(inputs, modality, reuse=reuse)
def encoder(inputs,
            prefix,
            num_units,
            dropout_rate,
            trainable=True,
            batchnorm=True,
            is_training=False,
            reuse=tf.AUTO_REUSE,
            dropout_layers=[]):
    """
    VGG16 image encoder with fusion of conv4_3 and conv5_3 features.

    Args:
        inputs: input tensor, in channel-last-format
        prefix: prefix of any variable name
        num_units: Number of feature units in the FCN.
        batchnorm (bool): Whether or not to perform batch normalization.
        trainable (bool): If False, variables are not trainable.
        is_training (bool): Indicator whether batch_normalization should be in training
            (batch) or testing (continuous) mode.
        reuse (bool): If true, reuse existing variables of same name
            (attention with prefix). Will raise error if it cannot find such variables.
        dropout_layers: a list of layers after which to apply dropout. Accepted possible
            values are 'pool3' and 'pool4'
    Returns:
        Dict of (intermediate) layer outputs. The encoding has key 'fused'.
    """
    # These parameters are shared between many/all layers and therefore defined here
    # for convenience.
    params = {
        'activation': tf.nn.relu,
        'padding': 'same',
        'reuse': reuse,
        'batch_normalization': batchnorm,
        'training': is_training,
        'trainable': trainable
    }

    with tf.variable_scope(prefix, reuse=reuse):
        # dict for all the layers
        l = {}
        l['conv1_1'] = conv2d(inputs, 64, [3, 3], name='conv1_1', **params)
        l['conv1_2'] = conv2d(l['conv1_1'],
                              64, [3, 3],
                              name='conv1_2',
                              **params)
        l['pool1'] = max_pooling2d(l['conv1_2'], [2, 2], [2, 2], name='pool1')
        l['conv2_1'] = conv2d(l['pool1'],
                              128, [3, 3],
                              name='conv2_1',
                              **params)
        l['conv2_2'] = conv2d(l['conv2_1'],
                              128, [3, 3],
                              name='conv2_2',
                              **params)
        l['pool2'] = max_pooling2d(l['conv2_2'], [2, 2], [2, 2], name='pool2')
        l['conv3_1'] = conv2d(l['pool2'],
                              256, [3, 3],
                              name='conv3_1',
                              **params)
        l['conv3_2'] = conv2d(l['conv3_1'],
                              256, [3, 3],
                              name='conv3_2',
                              **params)
        l['conv3_3'] = conv2d(l['conv3_2'],
                              256, [3, 3],
                              name='conv3_3',
                              **params)
        l['pool3'] = max_pooling2d(l['conv3_3'], [2, 2], [2, 2], name='pool3')
        # dropout after pool3
        last_layer = l['pool3']
        if 'pool3' in dropout_layers:
            l['pool3_drop'] = dropout(l['pool3'],
                                      rate=dropout_rate,
                                      training=True,
                                      name='pool3_dropout')
            last_layer = l['pool3_drop']
        l['conv4_1'] = conv2d(last_layer,
                              512, [3, 3],
                              name='conv4_1',
                              **params)
        l['conv4_2'] = conv2d(l['conv4_1'],
                              512, [3, 3],
                              name='conv4_2',
                              **params)
        l['conv4_3'] = conv2d(l['conv4_2'],
                              512, [3, 3],
                              name='conv4_3',
                              **params)
        l['pool4'] = max_pooling2d(l['conv4_3'], [2, 2], [2, 2], name='pool4')
        # dropout after pool4
        last_layer = l['pool4']
        if 'pool3' in dropout_layers:
            l['pool4_drop'] = dropout(l['pool4'],
                                      rate=dropout_rate,
                                      training=True,
                                      name='pool4_dropout')
            last_layer = l['pool4_drop']
        l['conv5_1'] = conv2d(last_layer,
                              512, [3, 3],
                              name='conv5_1',
                              **params)
        l['conv5_2'] = conv2d(l['conv5_1'],
                              512, [3, 3],
                              name='conv5_2',
                              **params)
        l['conv5_3'] = conv2d(l['conv5_2'],
                              512, [3, 3],
                              name='conv5_3',
                              **params)
        # Use 1x1 convolutions on conv4_3 and conv5_3 to define features.
        # first, maybe apply dropout at these layers?
        conv4_3 = l['conv4_3']
        if 'conv4_3' in dropout_layers:
            conv4_3 = dropout(conv4_3,
                              rate=dropout_rate,
                              training=True,
                              name='conv4_3_dropout')
        score_conv4 = conv2d(conv4_3,
                             num_units, [1, 1],
                             name='score_conv4',
                             **params)
        conv5_3 = l['conv5_3']
        if 'conv5_3' in dropout_layers:
            conv5_3 = dropout(conv5_3,
                              rate=dropout_rate,
                              training=True,
                              name='conv5_3_dropout')
        score_conv5 = conv2d(conv5_3,
                             num_units, [1, 1],
                             name='score_conv5',
                             **params)
        # The deconvolution is always set static.
        params['trainable'] = False
        upscore_conv5 = deconv2d(score_conv5,
                                 num_units, [4, 4],
                                 strides=[2, 2],
                                 name='upscore_conv5',
                                 **params)

        l['fused'] = tf.add_n([score_conv4, upscore_conv5], name='add_score')
    # Return dictionary of all layers
    return l
def decoder(features,
            prefix,
            num_units,
            num_classes,
            trainable=True,
            is_training=False,
            reuse=tf.AUTO_REUSE,
            dropout_rate=None,
            batchnorm=True):
    """
    FCN feature decoder.

    Args:
        features: input tensor, in feature-last format
        prefix: prefix of any variable name
        num_units: Number of feature units in the FCN.
        num_classes: Number of output classes.
        dropout_rate: Dropout rate for dropout applied on input feature. Set to 0 to
            disable dropout.
        batchnorm (bool): Whether or not to perform batch normalization.
        trainable (bool): If False, variables are not trainable.
        is_training (bool): Indicator whether batch_normalization should be in training
            (batch) or testing (continuous) mode.
        reuse (bool): If true, reuse existing variables of same name
            (attention with prefix). Will raise error if it cannot find such variables.
        dropout_rate: If set, apply dropout on the decoder input with the given rate
    Returns:
        dict of (intermediate) layer outputs. The final per-class activations have key
            'score'.
    """
    # These parameters are shared between many/all layers and therefore defined here
    # for convenience.
    params = {
        'activation': tf.nn.relu,
        'padding': 'same',
        'reuse': reuse,
        'batch_normalization': batchnorm,
        'training': is_training,
        'trainable': trainable
    }
    # Upscore layers are never trainable
    upscore_params = deepcopy(params)
    upscore_params['trainable'] = False

    with tf.variable_scope(prefix, reuse=reuse):
        if dropout_rate is not None:
            features = dropout(features,
                               rate=dropout_rate,
                               training=True,
                               name='features_dropout')

        # Upsample the fused features to the output classification size
        features = deconv2d(features,
                            num_units, [16, 16],
                            strides=[8, 8],
                            name='upscore',
                            **upscore_params)
        # no activation before the softmax
        params['activation'] = None
        score = conv2d(features, num_classes, [1, 1], name='score', **params)
    return {'upscore': features, 'score': score}
Example #4
0
    def call(self, inputs, state):
        """Run one step of LSTM.

        Args:
          inputs: input Tensor, 2D, `[batch, num_units].
          state: if `state_is_tuple` is False, this must be a state Tensor,
            `2-D, [batch, state_size]`.  If `state_is_tuple` is True, this must be a
            tuple of state Tensors, both `2-D`, with column sizes `c_state` and
            `m_state`.

        Returns:
          A tuple containing:

          - A `2-D, [batch, output_dim]`, Tensor representing the output of the
            LSTM after reading `inputs` when previous state was `state`.
            Here output_dim is:
               num_proj if num_proj was set,
               num_units otherwise.
          - Tensor(s) representing the new state of LSTM after reading `inputs` when
            the previous state was `state`.  Same type and shape(s) as `state`.

        Raises:
          ValueError: If input size cannot be inferred from inputs via
            static shape inference.
        """
        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid


        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])

        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError("Could not infer input size from inputs.get_shape()[-1]")

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        lstm_matrix = math_ops.matmul(array_ops.concat([inputs, m_prev], 1), self._kernel)
        lstm_matrix = nn_ops.bias_add(lstm_matrix, self._bias)
        i, j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=4, axis=1)
        
        binary_mask_cell = dropout(array_ops.ones_like(c_prev), rate=self.cell_zoneout_rate, training=self.is_training)
        binary_mask_cell_complement = array_ops.ones_like(binary_mask_cell) - binary_mask_cell
        binary_mask_output = dropout(array_ops.ones_like(m_prev), rate=self.output_zoneout_rate, training=self.is_training)
        binary_mask_output_complement = array_ops.ones_like(binary_mask_output) - binary_mask_output

        # Diagonal connections
        if self._use_peepholes:
            c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev +
                sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
        else:
            c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) *
                self._activation(j))

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
            # pylint: enable=invalid-unary-operand-type
        
        c = binary_mask_cell * c + binary_mask_cell_complement * c_prev;

        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            m = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            m = math_ops.matmul(m, self._proj_kernel)

        if self._proj_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
            # pylint: enable=invalid-unary-operand-type

        m = binary_mask_output * m + binary_mask_output_complement * m_prev;

        new_state = LSTMStateTuple(c, m) if self._state_is_tuple else array_ops.concat([c, m], 1)
        
        return m, new_state