Esempio n. 1
0
def construct_ls_vo_rt_model(inputs,
                             cropping=((0, 0), (0, 0)),
                             output_size=500,
                             regularization=0,
                             kernel_initializer='glorot_normal'):

    inputs = concat(inputs)
    features, bottleneck = construct_encoder(
        inputs, kernel_initializer=kernel_initializer)
    reconstructed_flow = construct_flow_decoder(
        bottleneck, cropping=cropping, output_channels=inputs.shape[-1].value)
    fc_rotation = dense(features,
                        output_size=output_size,
                        layers_num=2,
                        regularization=regularization,
                        kernel_initializer=kernel_initializer,
                        name='rotation')
    fc_translation = dense(features,
                           output_size=output_size,
                           layers_num=2,
                           regularization=regularization,
                           kernel_initializer=kernel_initializer,
                           name='translation')

    outputs = construct_outputs(
        [fc_rotation] * 3 + [fc_translation] * 3,
        regularization=regularization) + [reconstructed_flow]
    return outputs
Esempio n. 2
0
def construct_simple_model(inputs,
                           conv_layers=3,
                           conv_filters=64,
                           kernel_sizes=3,
                           strides=1,
                           paddings='same',
                           fc_layers=2,
                           output_sizes=500,
                           activations='elu',
                           regularizations=0,
                           batch_norms=True):
    if type(conv_filters) != list:
        conv_filters = [conv_filters] * conv_layers
    if type(kernel_sizes) != list:
        kernel_sizes = [kernel_sizes] * conv_layers
    if type(strides) != list:
        strides = [strides] * conv_layers
    if type(paddings) != list:
        paddings = [paddings] * conv_layers
    if type(output_sizes) != list:
        output_sizes = [output_sizes] * fc_layers
    if type(activations) != list:
        activations = [activations] * (conv_layers + fc_layers)
    if type(regularizations) != list:
        regularizations = [regularizations] * (conv_layers + fc_layers)
    if type(batch_norms) != list:
        batch_norms = [batch_norms] * (conv_layers + fc_layers)

    inputs = concat(inputs)

    conv = inputs
    for i in range(conv_layers):
        conv = conv2d(conv,
                      conv_filters[i],
                      kernel_size=kernel_sizes[i],
                      batch_norm=batch_norms[i],
                      padding=paddings[i],
                      kernel_initializer='glorot_normal',
                      strides=strides[i],
                      activation=activations[i],
                      activity_regularizer=l2(regularizations[i]))

    flatten = Flatten()(conv)

    fc = flatten
    for i in range(fc_layers):
        fc = Dense(output_sizes[i],
                   kernel_initializer='glorot_normal',
                   activation=activations[i + conv_layers],
                   activity_regularizer=l2(regularizations[i]))(fc)

    outputs = construct_outputs([fc] * 6)
    return outputs
Esempio n. 3
0
def construct_st_vo_model(inputs, kernel_initializer='glorot_normal'):

    inputs = concat(inputs)
    conv1 = Conv2D(64,
                   kernel_size=3,
                   strides=2,
                   kernel_initializer=kernel_initializer,
                   name='conv1')(inputs)
    pool1 = MaxPooling2D(pool_size=4, strides=4, name='pool1')(conv1)
    conv2 = Conv2D(20,
                   kernel_size=3,
                   kernel_initializer=kernel_initializer,
                   name='conv2')(pool1)
    pool2 = MaxPooling2D(pool_size=2, strides=2, name='pool2')(conv2)

    flatten1 = Flatten(name='flatten1')(pool1)
    flatten2 = Flatten(name='flatten2')(pool2)
    merged = concatenate([flatten1, flatten2], axis=1)
    activation = Activation('relu')(merged)
    fc = dense(activation, kernel_initializer=kernel_initializer, name='fc')
    outputs = construct_outputs([fc] * 6)
    return outputs
Esempio n. 4
0
def construct_ls_vo_rt_no_decoder_model(inputs,
                                        output_size=500,
                                        regularization=0,
                                        kernel_initializer='glorot_normal'):
    inputs = concat(inputs)
    features, _ = construct_encoder(inputs,
                                    kernel_initializer=kernel_initializer)
    fc_rotation = dense(features,
                        output_size=output_size,
                        layers_num=2,
                        regularization=regularization,
                        kernel_initializer=kernel_initializer,
                        name='rotation')
    fc_translation = dense(features,
                           output_size=output_size,
                           layers_num=2,
                           regularization=regularization,
                           kernel_initializer=kernel_initializer,
                           name='translation')

    outputs = construct_outputs([fc_rotation] * 3 + [fc_translation] * 3,
                                regularization=regularization)
    return outputs
Esempio n. 5
0
def construct_resnet50_model(inputs,
                             weights='imagenet',
                             kernel_initializer='glorot_normal'):

    inputs = concat(inputs)
    conv0 = Conv2D(3,
                   kernel_size=7,
                   padding='same',
                   activation='relu',
                   kernel_initializer=kernel_initializer,
                   name='conv0')(inputs)

    features = ResNet50(weights=weights, include_top=False,
                        pooling=None)(conv0)
    flatten = Flatten()(features)

    fc = dense(flatten,
               output_size=500,
               num_layers=2,
               activation='relu',
               kernel_initializer=kernel_initializer)

    outputs = construct_outputs([fc] * 6)
    return outputs
Esempio n. 6
0
def construct_encoder(inputs,
                      layers=4,
                      filters=[[16, 16, 32]] * 4,
                      kernel_sizes=[[7, 5, 3]] * 4,
                      strides=2,
                      dilation_rates=None,
                      kernel_initializer='glorot_normal',
                      use_gated_convolutions=False):
    conv = gated_conv2d if use_gated_convolutions else conv2d

    makelist = lambda x: [x] if isinstance(x, int) else x

    if isinstance(filters, int):
        filters = [filters] * layers

    if isinstance(kernel_sizes, int):
        kernel_sizes = [kernel_sizes] * layers

    if isinstance(strides, int):
        strides = [strides] * layers

    if dilation_rates is None:
        dilation_rates = [1] * layers

    for i in range(layers):
        layer_filters = makelist(filters[i])
        layer_kernel_sizes = makelist(kernel_sizes[i])
        layer_dilation_rates = makelist(dilation_rates[i])
        layer_stride = strides[i]
        convs = max(len(layer_filters), max(len(layer_kernel_sizes), len(layer_dilation_rates)))

        assert len(layer_filters) in (1, convs)
        assert len(layer_kernel_sizes) in (1, convs)
        assert len(layer_dilation_rates) in (1, convs)

        if len(layer_filters) == 1:
            layer_filters *= convs

        if len(layer_kernel_sizes) == 1:
            layer_kernel_sizes *= convs

        if len(layer_dilation_rates) == 1:
            layer_dilation_rates *= convs

        print(f'Layer {i + 1}: {convs} convolutions')

        outputs = []
        for flt, kernel_size, dilation_rate in zip(layer_filters, layer_kernel_sizes, layer_dilation_rates):
            print(f'\tfilters={flt}, kernel size={kernel_size}, stride={layer_stride}, dilation rate={dilation_rate}')
            outputs.append(
                conv(inputs,
                     flt,
                     kernel_size=kernel_size,
                     strides=layer_stride,
                     dilation_rate=dilation_rate,
                     padding='same',
                     activation='relu',
                     kernel_initializer=kernel_initializer)
            )
        inputs = concat(outputs)

    merged = conv(inputs,
                  64,
                  kernel_size=1,
                  padding='same',
                  activation='relu',
                  kernel_initializer=kernel_initializer)
    flatten = Flatten()(merged)
    return flatten
Esempio n. 7
0
def construct_encoder(inputs,
                      use_depth=True,
                      use_flow=True,
                      use_association_layer=True,
                      use_grid=False,
                      concat_axis=3,
                      filters=256,
                      stride=2,
                      f_x=1,
                      f_y=1,
                      c_x=0.5,
                      c_y=0.5,
                      kernel_initializer='glorot_normal'):
    # flow convolutional branch
    if use_flow:
        flow = concat(inputs[:2])

        if use_grid:
            flow = add_grid(flow, f_x=f_x, f_y=f_y, c_x=c_x, c_y=c_y)

        for i in range(1, 5):
            flow = conv2d(flow,
                          2 ** (i + 5),
                          kernel_size=3,
                          strides=2,
                          kernel_initializer=kernel_initializer,
                          name=f'conv{i}_flow')

    # depth convolutional branch
    if use_depth:
        if use_association_layer: # pass flow_z as input
            depth = depth_flow(concat(inputs))
        else:
            depth = concat(inputs[2:])

        if use_grid:
            depth = add_grid(depth, f_x=f_x, f_y=f_y, c_x=c_x, c_y=c_y)

        for i in range(1, 5):
            depth = conv2d(depth,
                           2 ** (i + 5),
                           kernel_size=3,
                           strides=2,
                           kernel_initializer=kernel_initializer,
                           name=f'conv{i}_depth')

    if use_flow and use_depth:
        concatenated = concat([flow, depth])
    elif use_flow:
        concatenated = flow
    elif use_depth:
        concatenated = depth

    merged = conv2d(concatenated,
                    filters,
                    kernel_size=1,
                    strides=stride,
                    kernel_initializer=kernel_initializer,
                    name='merge')

    flatten = Flatten()(merged)
    return flatten
Esempio n. 8
0
def construct_flexible_model(inputs,
                             kernel_sizes=[7, 5, 3, 3, 3, 3],
                             strides=[2, 1, 4, 1, 2, 1],
                             dilation_rates=None,
                             output_size=500,
                             regularization=0,
                             activation='relu',
                             kernel_initializer='glorot_normal',
                             use_gated_convolutions=False,
                             use_batch_norm=False,
                             split=False,
                             transform=None,
                             agnostic=True,
                             channel_wise=False,
                             concat_scale_to_fc=False,
                             multiply_outputs_by_scale=False,
                             confidence_mode=None):

    inputs, scale = transform_inputs(inputs,
                                     transform=transform,
                                     agnostic=agnostic,
                                     channel_wise=channel_wise)

    features = construct_encoder(inputs,
                                 kernel_sizes=kernel_sizes,
                                 strides=strides,
                                 dilation_rates=dilation_rates,
                                 activation=activation,
                                 kernel_initializer=kernel_initializer,
                                 use_gated_convolutions=use_gated_convolutions,
                                 use_batch_norm=use_batch_norm)

    if concat_scale_to_fc:
        fc_rotation = features
        fc_translation = features

        for i in range(2):
            fc_rotation = concat([fc_rotation, scale])
            fc_translation = concat([fc_translation, scale])

            fc_rotation = dense(fc_rotation,
                                output_size=output_size,
                                layers_num=1,
                                regularization=regularization,
                                activation=activation,
                                kernel_initializer=kernel_initializer)

            fc_translation = dense(fc_translation,
                                   output_size=output_size,
                                   layers_num=1,
                                   regularization=regularization,
                                   activation=activation,
                                   kernel_initializer=kernel_initializer)
    else:
        fc_rotation = dense(features,
                            output_size=output_size,
                            layers_num=2,
                            regularization=regularization,
                            activation=activation,
                            kernel_initializer=kernel_initializer,
                            name='rotation')
        fc_translation = dense(features,
                               output_size=output_size,
                               layers_num=2,
                               regularization=regularization,
                               activation=activation,
                               kernel_initializer=kernel_initializer,
                               name='translation')

    if split:
        fc = chunk(fc_rotation, n=3) + chunk(fc_translation, n=3)
    else:
        fc = [fc_rotation] * 3 + [fc_translation] * 3

    outputs = construct_outputs(
        fc,
        regularization=regularization,
        scale=scale if multiply_outputs_by_scale else None,
        confidence_mode=confidence_mode)
    return outputs
Esempio n. 9
0
def construct_sequential_rt_model(inputs,
                                  intrinsics,
                                  use_input_flow=False,
                                  use_diff_flow=False,
                                  use_rotation_flow=False,
                                  kernel_sizes=[7, 5, 3, 3, 3, 3],
                                  strides=[2, 1, 4, 1, 2, 1],
                                  dilation_rates=None,
                                  hidden_size=500,
                                  regularization=0,
                                  activation='relu',
                                  kernel_initializer='glorot_normal',
                                  use_gated_convolutions=False,
                                  use_batch_norm=False,
                                  return_confidence=False):

    assert use_input_flow or use_diff_flow or use_rotation_flow

    inputs = concat(inputs)
    features_rotation = construct_encoder(
        inputs,
        kernel_sizes=kernel_sizes,
        strides=strides,
        dilation_rates=dilation_rates,
        kernel_initializer=kernel_initializer,
        use_gated_convolutions=use_gated_convolutions,
        use_batch_norm=use_batch_norm)

    fc_rotation = dense(features_rotation,
                        output_size=hidden_size,
                        regularization=regularization,
                        activation=activation,
                        kernel_initializer=kernel_initializer,
                        layers_num=2,
                        name='rotation')

    output_rotation = construct_output(fc_rotation,
                                       name='rotation',
                                       regularization=regularization)

    rotation_flow = flow_composer(output_rotation, intrinsics=intrinsics)

    inputs_for_translation = []
    if use_input_flow:
        inputs_for_translation.append(inputs)
    if use_diff_flow:
        inputs_for_translation.append(Subtract()([inputs, rotation_flow]))
    if use_rotation_flow:
        inputs_for_translation.append(rotation_flow)

    features_translation = construct_encoder(
        concat(inputs_for_translation),
        kernel_sizes=kernel_sizes,
        strides=strides,
        dilation_rates=dilation_rates,
        kernel_initializer=kernel_initializer,
        use_gated_convolutions=use_gated_convolutions,
        use_batch_norm=use_batch_norm)

    fc_translation = dense(features_translation,
                           output_size=hidden_size,
                           regularization=regularization,
                           activation=activation,
                           kernel_initializer=kernel_initializer,
                           layers_num=2,
                           name='translation')

    output_translation = construct_output(fc_translation,
                                          name='translation',
                                          regularization=regularization)

    return output_rotation + output_translation