예제 #1
0
def depthmotion_block_demon_original(image_pair,
                                     image2_2,
                                     prev_flow2,
                                     prev_flowconf2,
                                     prev_rotation=None,
                                     prev_translation=None,
                                     intrinsics=None,
                                     data_format='channels_first'):
    """Creates a depth and motion network
    
    image_pair: Tensor
        Image pair concatenated along the channel axis.
        The tensor format is NCHW with C == 6.

    image2_2: Tensor
        Second image at resolution level 2

    prev_flow2: Tensor
        The output of the flow network. Contains only the flow (2 channels)

    prev_flowconf2: Tensor
        The output of the flow network. Contains flow and flow confidence (4 channels)

    prev_rotation: Tensor
        The previously predicted rotation.
        
    prev_translaion: Tensor
        The previously predicted translation.

    intrinsics: Tensor
        Tensor with the intrinsic camera parameters
        Only required if prev_rotation and prev_translation is not None.
        
    Returns a dictionary with the predictions for depth, normals and motion
    """
    conv_params = {'data_format': data_format}
    fc_params = {}

    # contracting part
    conv1 = convrelu2_caffe_padding(name='conv1',
                                    inputs=image_pair,
                                    num_outputs=32,
                                    kernel_size=9,
                                    stride=2,
                                    **conv_params)

    conv2 = convrelu2_caffe_padding(name='conv2',
                                    inputs=conv1,
                                    num_outputs=32,
                                    kernel_size=7,
                                    stride=2,
                                    **conv_params)
    # create extra inputs
    if data_format == 'channels_first':
        image2_2_warped = sops.warp2d(image2_2,
                                      prev_flow2,
                                      normalized=True,
                                      border_mode='value')
    else:
        prev_flow2_nchw = convert_NHWC_to_NCHW(prev_flow2)
        image2_2_warped = convert_NCHW_to_NHWC(
            sops.warp2d(convert_NHWC_to_NCHW(image2_2),
                        prev_flow2_nchw,
                        normalized=True,
                        border_mode='value'))

    extra_inputs = [image2_2_warped, prev_flowconf2]
    if (not prev_rotation is None) and (not prev_translation is None) and (
            not intrinsics is None):
        if data_format == 'channels_first':
            depth_from_flow = sops.flow_to_depth(
                flow=prev_flow2,
                intrinsics=intrinsics,
                rotation=prev_rotation,
                translation=prev_translation,
                normalized_flow=True,
                inverse_depth=True,
            )
        else:
            depth_from_flow = convert_NCHW_to_NHWC(
                sops.flow_to_depth(
                    flow=prev_flow2_nchw,
                    intrinsics=intrinsics,
                    rotation=prev_rotation,
                    translation=prev_translation,
                    normalized_flow=True,
                    inverse_depth=True,
                ))

        extra_inputs.append(depth_from_flow)

    concat_extra_inputs = tf.stop_gradient(
        tf.concat(extra_inputs,
                  axis=1 if data_format == 'channels_first' else 3))
    conv_extra_inputs = convrelu2_caffe_padding(name='conv2_extra_inputs',
                                                inputs=concat_extra_inputs,
                                                num_outputs=32,
                                                kernel_size=3,
                                                stride=1,
                                                **conv_params)
    conv2_concat = tf.concat((conv2, conv_extra_inputs),
                             axis=1 if data_format == 'channels_first' else 3)
    conv2_1 = convrelu2_caffe_padding(name='conv2_1',
                                      inputs=conv2_concat,
                                      num_outputs=64,
                                      kernel_size=3,
                                      stride=1,
                                      **conv_params)
    tf.add_to_collection('endpoints', conv2_1)

    conv3 = convrelu2_caffe_padding(name='conv3',
                                    inputs=conv2_1,
                                    num_outputs=128,
                                    kernel_size=5,
                                    stride=2,
                                    **conv_params)
    conv3_1 = convrelu2_caffe_padding(name='conv3_1',
                                      inputs=conv3,
                                      num_outputs=128,
                                      kernel_size=3,
                                      stride=1,
                                      **conv_params)
    tf.add_to_collection('endpoints', conv3_1)

    conv4 = convrelu2_caffe_padding(name='conv4',
                                    inputs=conv3_1,
                                    num_outputs=256,
                                    kernel_size=5,
                                    stride=2,
                                    **conv_params)
    conv4_1 = convrelu2_caffe_padding(name='conv4_1',
                                      inputs=conv4,
                                      num_outputs=256,
                                      kernel_size=3,
                                      stride=1,
                                      **conv_params)
    tf.add_to_collection('endpoints', conv4_1)

    conv5 = convrelu2_caffe_padding(name='conv5',
                                    inputs=conv4_1,
                                    num_outputs=512,
                                    kernel_size=3,
                                    stride=2,
                                    **conv_params)
    conv5_1 = convrelu2_caffe_padding(name='conv5_1',
                                      inputs=conv5,
                                      num_outputs=512,
                                      kernel_size=3,
                                      stride=1,
                                      **conv_params)
    tf.add_to_collection('endpoints', conv5_1)

    # motion prediction part
    motion_conv1 = convrelu_caffe_padding(
        name='motion_conv1',
        inputs=conv5_1,
        num_outputs=128,
        kernel_size=3,
        strides=1,
        **conv_params,
    )
    tf.add_to_collection('endpoints', motion_conv1)

    if data_format == 'channels_last':
        motion_conv1 = convert_NHWC_to_NCHW(motion_conv1)
    motion_fc1 = tf.layers.dense(
        name='motion_fc1',
        inputs=tf.contrib.layers.flatten(motion_conv1),
        units=1024,
        activation=myLeakyRelu,
        **fc_params,
    )
    motion_fc2 = tf.layers.dense(
        name='motion_fc2',
        inputs=motion_fc1,
        units=128,
        activation=myLeakyRelu,
        **fc_params,
    )
    predict_motion_scale = tf.layers.dense(
        name='motion_fc3',
        inputs=motion_fc2,
        units=7,
        activation=None,
        **fc_params,
    )

    predict_rotation, predict_translation, predict_scale = tf.split(
        value=predict_motion_scale, num_or_size_splits=[3, 3, 1], axis=1)

    # expanding part
    with tf.variable_scope('refine4'):
        concat4 = _refine_caffe_padding(
            inp=conv5_1,
            num_outputs=256,
            features_direct=conv4_1,
            **conv_params,
        )

    with tf.variable_scope('refine3'):
        concat3 = _refine_caffe_padding(
            inp=concat4,
            num_outputs=128,
            features_direct=conv3_1,
            **conv_params,
        )

    with tf.variable_scope('refine2'):
        concat2 = _refine_caffe_padding(
            inp=concat3,
            num_outputs=64,
            features_direct=conv2_1,
            **conv_params,
        )

    with tf.variable_scope('predict_depthnormal2'):
        predict_depth2, predict_normal2 = _predict_depthnormal_caffe_padding(
            concat2, predicted_scale=predict_scale, **conv_params)

    return {
        'predict_depth2': predict_depth2,
        'predict_normal2': predict_normal2,
        'predict_rotation': predict_rotation,
        'predict_translation': predict_translation,
        'predict_scale': predict_scale,
    }
예제 #2
0
def depthmotion_block(image_pair,
                      image2_2,
                      prev_flow2,
                      prev_flowconf2,
                      prev_rotation=None,
                      prev_translation=None,
                      intrinsics=None,
                      data_format='channels_first',
                      kernel_regularizer=None):
    """Creates a depth and motion network
    
    image_pair: Tensor
        Image pair concatenated along the channel axis.
        The tensor format is NCHW with C == 6.

    image2_2: Tensor
        Second image at resolution level 2

    prev_flow2: Tensor
        The output of the flow network. Contains only the flow (2 channels)

    prev_flowconf2: Tensor
        The output of the flow network. Contains flow and flow confidence (4 channels)

    prev_rotation: Tensor
        The previously predicted rotation.
        
    prev_translaion: Tensor
        The previously predicted translation.

    intrinsics: Tensor
        Tensor with the intrinsic camera parameters
        Only required if prev_rotation and prev_translation is not None.
        
    Returns a dictionary with the predictions for depth, normals and motion
    """
    conv_params = {
        'data_format': data_format,
        'kernel_regularizer': kernel_regularizer
    }
    fc_params = {}

    # contracting part
    conv1 = convrelu2(name='conv1',
                      inputs=image_pair,
                      num_outputs=(24, 32),
                      kernel_size=9,
                      stride=2,
                      **conv_params)

    conv2 = convrelu2(name='conv2',
                      inputs=conv1,
                      num_outputs=32,
                      kernel_size=7,
                      stride=2,
                      **conv_params)
    # create extra inputs
    if data_format == 'channels_first':
        image2_2_warped = sops.warp2d(image2_2,
                                      prev_flow2,
                                      normalized=True,
                                      border_mode='value')
    else:
        prev_flow2_nchw = convert_NHWC_to_NCHW(prev_flow2)
        image2_2_warped = convert_NCHW_to_NHWC(
            sops.warp2d(convert_NHWC_to_NCHW(image2_2),
                        prev_flow2_nchw,
                        normalized=True,
                        border_mode='value'))

    extra_inputs = [image2_2_warped, prev_flowconf2]
    if (not prev_rotation is None) and (not prev_translation is None) and (
            not intrinsics is None):
        if data_format == 'channels_first':
            depth_from_flow = sops.flow_to_depth2(
                flow=prev_flow2,
                intrinsics=intrinsics,
                rotation=prev_rotation,
                translation=prev_translation,
                normalized_flow=True,
                inverse_depth=True,
            )
        else:
            depth_from_flow = convert_NCHW_to_NHWC(
                sops.flow_to_depth2(
                    flow=prev_flow2_nchw,
                    intrinsics=intrinsics,
                    rotation=prev_rotation,
                    translation=prev_translation,
                    normalized_flow=True,
                    inverse_depth=True,
                ))
        depth_from_flow = tf.clip_by_value(depth_from_flow, 0.0, 50.0)

        extra_inputs.append(depth_from_flow)

    concat_extra_inputs = tf.stop_gradient(
        tf.concat(extra_inputs,
                  axis=1 if data_format == 'channels_first' else 3))
    conv_extra_inputs = convrelu2(name='conv2_extra_inputs',
                                  inputs=concat_extra_inputs,
                                  num_outputs=32,
                                  kernel_size=3,
                                  stride=1,
                                  **conv_params)
    conv2_concat = tf.concat((conv2, conv_extra_inputs),
                             axis=1 if data_format == 'channels_first' else 3)
    conv2_1 = convrelu2(name='conv2_1',
                        inputs=conv2_concat,
                        num_outputs=64,
                        kernel_size=3,
                        stride=1,
                        **conv_params)

    conv3 = convrelu2(name='conv3',
                      inputs=conv2_1,
                      num_outputs=(96, 128),
                      kernel_size=5,
                      stride=2,
                      **conv_params)
    conv3_1 = convrelu2(name='conv3_1',
                        inputs=conv3,
                        num_outputs=128,
                        kernel_size=3,
                        stride=1,
                        **conv_params)

    conv4 = convrelu2(name='conv4',
                      inputs=conv3_1,
                      num_outputs=(192, 256),
                      kernel_size=5,
                      stride=2,
                      **conv_params)
    conv4_1 = convrelu2(name='conv4_1',
                        inputs=conv4,
                        num_outputs=256,
                        kernel_size=3,
                        stride=1,
                        **conv_params)

    conv5 = convrelu2(name='conv5',
                      inputs=conv4_1,
                      num_outputs=384,
                      kernel_size=3,
                      stride=2,
                      **conv_params)
    conv5_1 = convrelu2(name='conv5_1',
                        inputs=conv5,
                        num_outputs=384,
                        kernel_size=3,
                        stride=1,
                        **conv_params)

    dense_slice_shape = conv5_1.get_shape().as_list()
    if data_format == 'channels_first':
        dense_slice_shape[1] = 96
    else:
        dense_slice_shape[-1] = 96
    units = 1
    for i in range(1, len(dense_slice_shape)):
        units *= dense_slice_shape[i]
    dense5 = tf.layers.dense(tf.contrib.layers.flatten(
        tf.slice(conv5_1, [0, 0, 0, 0], dense_slice_shape)),
                             units=units,
                             activation=myLeakyRelu,
                             kernel_initializer=default_weights_initializer(),
                             kernel_regularizer=kernel_regularizer,
                             name='dense5')
    print(dense5)
    conv5_1_dense5 = tf.concat(
        (conv5_1, tf.reshape(dense5, dense_slice_shape)),
        axis=1 if data_format == 'channels_first' else 3)

    # motion prediction part
    motion_conv3 = convrelu2(name='motion_conv3',
                             inputs=conv2_1,
                             num_outputs=64,
                             kernel_size=5,
                             stride=2,
                             **conv_params)
    motion_conv4 = convrelu2(name='motion_conv4',
                             inputs=motion_conv3,
                             num_outputs=64,
                             kernel_size=5,
                             stride=2,
                             **conv_params)
    motion_conv5a = convrelu2(name='motion_conv5a',
                              inputs=motion_conv4,
                              num_outputs=64,
                              kernel_size=3,
                              stride=2,
                              **conv_params)

    motion_conv5b = convrelu(
        name='motion_conv5b',
        inputs=conv5_1_dense5,
        num_outputs=64,
        kernel_size=3,
        strides=1,
        **conv_params,
    )
    motion_conv5_1 = tf.concat(
        (motion_conv5a, motion_conv5b),
        axis=1 if data_format == 'channels_first' else 3)

    if data_format == 'channels_last':
        motion_conv5_1 = convert_NHWC_to_NCHW(motion_conv5_1)
    motion_fc1 = tf.layers.dense(
        name='motion_fc1',
        inputs=tf.contrib.layers.flatten(motion_conv5_1),
        units=1024,
        activation=myLeakyRelu,
        kernel_regularizer=kernel_regularizer,
        **fc_params,
    )
    motion_fc2 = tf.layers.dense(
        name='motion_fc2',
        inputs=motion_fc1,
        units=128,
        activation=myLeakyRelu,
        kernel_regularizer=kernel_regularizer,
        **fc_params,
    )
    predict_motion_scale = tf.layers.dense(
        name='motion_fc3',
        inputs=motion_fc2,
        units=7,
        activation=None,
        kernel_regularizer=kernel_regularizer,
        **fc_params,
    )

    predict_rotation, predict_translation, predict_scale = tf.split(
        value=predict_motion_scale, num_or_size_splits=[3, 3, 1], axis=1)

    # expanding part
    with tf.variable_scope('refine4'):
        concat4 = _refine(
            inp=conv5_1,
            num_outputs=256,
            features_direct=conv4_1,
            **conv_params,
        )

    with tf.variable_scope('refine3'):
        concat3 = _refine(
            inp=concat4,
            num_outputs=128,
            features_direct=conv3_1,
            **conv_params,
        )

    with tf.variable_scope('refine2'):
        concat2 = _refine(
            inp=concat3,
            num_outputs=64,
            features_direct=conv2_1,
            **conv_params,
        )

    with tf.variable_scope('predict_depthnormal2'):
        predict_depth2, predict_normal2 = _predict_depthnormal(
            concat2, predicted_scale=predict_scale, **conv_params)

    return {
        'predict_depth2': predict_depth2,
        'predict_normal2': predict_normal2,
        'predict_rotation': predict_rotation,
        'predict_translation': predict_translation,
        'predict_scale': predict_scale,
    }
예제 #3
0
def flow_block_demon_original(image_pair,
                              image2_2=None,
                              intrinsics=None,
                              prev_predictions=None,
                              data_format='channels_first'):
    """Creates a flow network
    
    image_pair: Tensor
        Image pair concatenated along the channel axis.

    image2_2: Tensor
        Second image at resolution level 2 (downsampled two times)
        
    intrinsics: Tensor 
        The normalized intrinsic parameters

    prev_predictions: dict of Tensor
        Predictions from the previous depth block
    
    Returns a dict with the predictions
    """
    conv_params = {'data_format': data_format}

    # contracting part
    conv1 = convrelu2_caffe_padding(name='conv1',
                                    inputs=image_pair,
                                    num_outputs=32,
                                    kernel_size=9,
                                    stride=2,
                                    **conv_params)

    if prev_predictions is None:
        conv2 = convrelu2_caffe_padding(name='conv2',
                                        inputs=conv1,
                                        num_outputs=64,
                                        kernel_size=7,
                                        stride=2,
                                        **conv_params)
        conv2_1 = convrelu2_caffe_padding(name='conv2_1',
                                          inputs=conv2,
                                          num_outputs=64,
                                          kernel_size=3,
                                          stride=1,
                                          **conv_params)
        tf.add_to_collection('endpoints', conv2_1)
    else:
        conv2 = convrelu2_caffe_padding(name='conv2',
                                        inputs=conv1,
                                        num_outputs=32,
                                        kernel_size=7,
                                        stride=2,
                                        **conv_params)

        # create warped input
        if data_format == 'channels_first':
            prev_depth_nchw = prev_predictions['predict_depth2']
        else:
            prev_depth_nchw = convert_NHWC_to_NCHW(
                prev_predictions['predict_depth2'])

        _flow_from_depth_motion = sops.depth_to_flow(
            intrinsics=intrinsics,
            depth=prev_depth_nchw,
            rotation=prev_predictions['predict_rotation'],
            translation=prev_predictions['predict_translation'],
            inverse_depth=True,
            normalize_flow=True,
        )
        # set flow vectors to zero if the motion is too large.
        # this also eliminates nan values which can be produced by very bad camera parameters
        flow_from_depth_motion_norm = tf.norm(_flow_from_depth_motion,
                                              axis=1,
                                              keep_dims=True)
        flow_from_depth_motion_norm = tf.concat(
            (flow_from_depth_motion_norm, flow_from_depth_motion_norm), axis=1)
        tmp_zeros = tf.zeros_like(_flow_from_depth_motion, dtype=tf.float32)
        flow_from_depth_motion = tf.where(flow_from_depth_motion_norm < 1.0,
                                          _flow_from_depth_motion, tmp_zeros)

        image2_2_warped = sops.warp2d(
            input=image2_2 if data_format == 'channels_first' else
            convert_NHWC_to_NCHW(image2_2),
            displacements=flow_from_depth_motion,
            normalized=True,
            border_mode='value',
        )
        if data_format == 'channels_last':
            flow_from_depth_motion = convert_NCHW_to_NHWC(
                flow_from_depth_motion)
            image2_2_warped = convert_NCHW_to_NHWC(image2_2_warped)
        extra_inputs = (image2_2_warped, flow_from_depth_motion,
                        prev_predictions['predict_depth2'],
                        prev_predictions['predict_normal2'])

        # stop gradient here
        extra_inputs_concat = tf.stop_gradient(
            tf.concat(extra_inputs,
                      axis=1 if data_format == 'channels_first' else 3))

        conv_extra_inputs = convrelu2_caffe_padding(name='conv2_extra_inputs',
                                                    inputs=extra_inputs_concat,
                                                    num_outputs=32,
                                                    kernel_size=3,
                                                    stride=1,
                                                    **conv_params)
        conv2_concat = tf.concat(
            (conv2, conv_extra_inputs),
            axis=1 if data_format == 'channels_first' else 3)
        conv2_1 = convrelu2_caffe_padding(name='conv2_1',
                                          inputs=conv2_concat,
                                          num_outputs=64,
                                          kernel_size=3,
                                          stride=1,
                                          **conv_params)

    conv3 = convrelu2_caffe_padding(name='conv3',
                                    inputs=conv2_1,
                                    num_outputs=128,
                                    kernel_size=5,
                                    stride=2,
                                    **conv_params)
    conv3_1 = convrelu2_caffe_padding(name='conv3_1',
                                      inputs=conv3,
                                      num_outputs=128,
                                      kernel_size=3,
                                      stride=1,
                                      **conv_params)
    tf.add_to_collection('endpoints', conv3_1)

    conv4 = convrelu2_caffe_padding(name='conv4',
                                    inputs=conv3_1,
                                    num_outputs=256,
                                    kernel_size=5,
                                    stride=2,
                                    **conv_params)
    conv4_1 = convrelu2_caffe_padding(name='conv4_1',
                                      inputs=conv4,
                                      num_outputs=256,
                                      kernel_size=3,
                                      stride=1,
                                      **conv_params)
    tf.add_to_collection('endpoints', conv4_1)

    conv5 = convrelu2_caffe_padding(name='conv5',
                                    inputs=conv4_1,
                                    num_outputs=512,
                                    kernel_size=5,
                                    stride=2,
                                    **conv_params)
    conv5_1 = convrelu2_caffe_padding(name='conv5_1',
                                      inputs=conv5,
                                      num_outputs=512,
                                      kernel_size=3,
                                      stride=1,
                                      **conv_params)
    tf.add_to_collection('endpoints', conv5_1)

    # expanding part
    with tf.variable_scope('predict_flow5'):
        predict_flowconf5 = _predict_flow_caffe_padding(
            conv5_1, predict_confidence=True, **conv_params)
        tf.add_to_collection('endpoints', predict_flowconf5)

    with tf.variable_scope('upsample_flow5to4'):
        predict_flowconf5to4 = _upsample_prediction(predict_flowconf5, 2,
                                                    **conv_params)

    with tf.variable_scope('refine4'):
        concat4 = _refine_caffe_padding(
            inp=conv5_1,
            num_outputs=256,
            upsampled_prediction=predict_flowconf5to4,
            features_direct=conv4_1,
            **conv_params,
        )

    with tf.variable_scope('refine3'):
        concat3 = _refine_caffe_padding(
            inp=concat4,
            num_outputs=128,
            features_direct=conv3_1,
            **conv_params,
        )

    with tf.variable_scope('refine2'):
        concat2 = _refine_caffe_padding(
            inp=concat3,
            num_outputs=64,
            features_direct=conv2_1,
            **conv_params,
        )

    with tf.variable_scope('predict_flow2'):
        predict_flowconf2 = _predict_flow_caffe_padding(
            concat2, predict_confidence=True, **conv_params)
        tf.add_to_collection('endpoints', predict_flowconf2)

    return {
        'predict_flowconf5': predict_flowconf5,
        'predict_flowconf2': predict_flowconf2
    }
예제 #4
0
def flow_block(image_pair,
               image2_2=None,
               intrinsics=None,
               prev_predictions=None,
               data_format='channels_first',
               kernel_regularizer=None):
    """Creates a flow network
    
    image_pair: Tensor
        Image pair concatenated along the channel axis.

    image2_2: Tensor
        Second image at resolution level 2 (downsampled two times)
        
    intrinsics: Tensor 
        The normalized intrinsic parameters

    prev_predictions: dict of Tensor
        Predictions from the previous depth block
    
    Returns a dict with the predictions
    """
    conv_params = {
        'data_format': data_format,
        'kernel_regularizer': kernel_regularizer
    }

    # contracting part
    conv1 = convrelu2(name='conv1',
                      inputs=image_pair,
                      num_outputs=(24, 32),
                      kernel_size=9,
                      stride=2,
                      **conv_params)

    if prev_predictions is None:
        conv2 = convrelu2(name='conv2',
                          inputs=conv1,
                          num_outputs=(48, 64),
                          kernel_size=7,
                          stride=2,
                          **conv_params)
        conv2_1 = convrelu2(name='conv2_1',
                            inputs=conv2,
                            num_outputs=64,
                            kernel_size=3,
                            stride=1,
                            **conv_params)
    else:
        conv2 = convrelu2(name='conv2',
                          inputs=conv1,
                          num_outputs=32,
                          kernel_size=7,
                          stride=2,
                          **conv_params)

        # create warped input
        if data_format == 'channels_first':
            prev_depth_nchw = prev_predictions['predict_depth2']
        else:
            prev_depth_nchw = convert_NHWC_to_NCHW(
                prev_predictions['predict_depth2'])

        _flow_from_depth_motion = sops.depth_to_flow(
            intrinsics=intrinsics,
            depth=prev_depth_nchw,
            rotation=prev_predictions['predict_rotation'],
            translation=prev_predictions['predict_translation'],
            inverse_depth=True,
            normalize_flow=True,
        )
        # set flow vectors to zero if the motion is too large.
        # this also eliminates nan values which can be produced by very bad camera parameters
        flow_from_depth_motion_norm = tf.norm(_flow_from_depth_motion,
                                              axis=1,
                                              keep_dims=True)
        flow_from_depth_motion_norm = tf.concat(
            (flow_from_depth_motion_norm, flow_from_depth_motion_norm), axis=1)
        tmp_zeros = tf.zeros_like(_flow_from_depth_motion, dtype=tf.float32)
        flow_from_depth_motion = tf.where(flow_from_depth_motion_norm < 1.0,
                                          _flow_from_depth_motion, tmp_zeros)

        image2_2_warped = sops.warp2d(
            input=image2_2 if data_format == 'channels_first' else
            convert_NHWC_to_NCHW(image2_2),
            displacements=flow_from_depth_motion,
            normalized=True,
            border_mode='value',
        )
        if data_format == 'channels_last':
            flow_from_depth_motion = convert_NCHW_to_NHWC(
                flow_from_depth_motion)
            image2_2_warped = convert_NCHW_to_NHWC(image2_2_warped)
        extra_inputs = (image2_2_warped, flow_from_depth_motion,
                        prev_predictions['predict_depth2'],
                        prev_predictions['predict_normal2'])

        # stop gradient here
        extra_inputs_concat = tf.stop_gradient(
            tf.concat(extra_inputs,
                      axis=1 if data_format == 'channels_first' else 3))

        conv_extra_inputs = convrelu2(name='conv2_extra_inputs',
                                      inputs=extra_inputs_concat,
                                      num_outputs=32,
                                      kernel_size=3,
                                      stride=1,
                                      **conv_params)
        conv2_concat = tf.concat(
            (conv2, conv_extra_inputs),
            axis=1 if data_format == 'channels_first' else 3)
        conv2_1 = convrelu2(name='conv2_1',
                            inputs=conv2_concat,
                            num_outputs=64,
                            kernel_size=3,
                            stride=1,
                            **conv_params)

    conv3 = convrelu2(name='conv3',
                      inputs=conv2_1,
                      num_outputs=(96, 128),
                      kernel_size=5,
                      stride=2,
                      **conv_params)
    conv3_1 = convrelu2(name='conv3_1',
                        inputs=conv3,
                        num_outputs=128,
                        kernel_size=3,
                        stride=1,
                        **conv_params)

    conv4 = convrelu2(name='conv4',
                      inputs=conv3_1,
                      num_outputs=(192, 256),
                      kernel_size=5,
                      stride=2,
                      **conv_params)
    conv4_1 = convrelu2(name='conv4_1',
                        inputs=conv4,
                        num_outputs=256,
                        kernel_size=3,
                        stride=1,
                        **conv_params)

    conv5 = convrelu2(name='conv5',
                      inputs=conv4_1,
                      num_outputs=384,
                      kernel_size=5,
                      stride=2,
                      **conv_params)
    conv5_1 = convrelu2(name='conv5_1',
                        inputs=conv5,
                        num_outputs=384,
                        kernel_size=3,
                        stride=1,
                        **conv_params)

    dense_slice_shape = conv5_1.get_shape().as_list()
    if data_format == 'channels_first':
        dense_slice_shape[1] = 96
    else:
        dense_slice_shape[-1] = 96
    units = 1
    for i in range(1, len(dense_slice_shape)):
        units *= dense_slice_shape[i]
    dense5 = tf.layers.dense(tf.contrib.layers.flatten(
        tf.slice(conv5_1, [0, 0, 0, 0], dense_slice_shape)),
                             units=units,
                             activation=myLeakyRelu,
                             kernel_initializer=default_weights_initializer(),
                             kernel_regularizer=kernel_regularizer,
                             name='dense5')
    print(dense5)
    conv5_1_dense5 = tf.concat(
        (conv5_1, tf.reshape(dense5, dense_slice_shape)),
        axis=1 if data_format == 'channels_first' else 3)

    # expanding part
    with tf.variable_scope('predict_flow5'):
        predict_flowconf5 = _predict_flow(conv5_1_dense5,
                                          predict_confidence=True,
                                          **conv_params)

    with tf.variable_scope('upsample_flow5to4'):
        predict_flowconf5to4 = _upsample_prediction(predict_flowconf5, 2,
                                                    **conv_params)

    with tf.variable_scope('refine4'):
        concat4 = _refine(
            inp=conv5_1_dense5,
            num_outputs=256,
            upsampled_prediction=predict_flowconf5to4,
            features_direct=conv4_1,
            **conv_params,
        )

    with tf.variable_scope('refine3'):
        concat3 = _refine(
            inp=concat4,
            num_outputs=128,
            features_direct=conv3_1,
            **conv_params,
        )

    with tf.variable_scope('refine2'):
        concat2 = _refine(
            inp=concat3,
            num_outputs=64,
            features_direct=conv2_1,
            **conv_params,
        )

    with tf.variable_scope('predict_flow2'):
        predict_flowconf2 = _predict_flow(concat2,
                                          predict_confidence=True,
                                          **conv_params)

    return {
        'predict_flowconf5': predict_flowconf5,
        'predict_flowconf2': predict_flowconf2
    }