Ejemplo n.º 1
0
def unet(network_input: NetworkInput) -> KerasModel:
    """
    :return:  model -- a model that has been defined, but not yet compiled.
                      The model is an implementation of the Unet paper
                      (https://arxiv.org/pdf/1505.04597.pdf) and comes
                      from this repo https://github.com/zhixuhao/unet. It has
                      been modified to keep up with API changes in keras 2.
    """
    inputs = Input(network_input.input_shape)

    conv1 = Conv2D(filters=64,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(inputs)
    conv1 = Conv2D(filters=64,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(filters=128,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(pool1)
    conv2 = Conv2D(filters=128,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(filters=256,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(pool2)
    conv3 = Conv2D(filters=256,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(filters=512,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(pool3)
    conv4 = Conv2D(filters=512,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv4)
    drop4 = Dropout(0.5)(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

    conv5 = Conv2D(filters=1024,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(pool4)
    conv5 = Conv2D(filters=1024,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv5)
    drop5 = Dropout(0.5)(conv5)

    up6 = UpSampling2D(size=(2, 2))(drop5)
    up6 = Conv2D(filters=512,
                 kernel_size=2,
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal')(up6)
    merge6 = Concatenate(axis=3)([drop4, up6])
    conv6 = Conv2D(filters=512,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(merge6)
    conv6 = Conv2D(filters=512,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv6)

    up7 = UpSampling2D(size=(2, 2))(conv6)
    up7 = Conv2D(filters=256,
                 kernel_size=2,
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal')(up7)
    merge7 = Concatenate(axis=3)([conv3, up7])
    conv7 = Conv2D(filters=256,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(merge7)
    conv7 = Conv2D(filters=256,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv7)

    up8 = UpSampling2D(size=(2, 2))(conv7)
    up8 = Conv2D(filters=128,
                 kernel_size=2,
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal')(up8)
    merge8 = Concatenate(axis=3)([conv2, up8])
    conv8 = Conv2D(filters=128,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(merge8)
    conv8 = Conv2D(filters=128,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv8)

    up9 = UpSampling2D(size=(2, 2))(conv8)
    up9 = Conv2D(filters=64,
                 kernel_size=2,
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal')(up9)
    merge9 = Concatenate(axis=3)([conv1, up9])
    conv9 = Conv2D(filters=64,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(merge9)
    conv9 = Conv2D(filters=64,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv9)
    conv9 = Conv2D(filters=2,
                   kernel_size=3,
                   activation='relu',
                   padding='same',
                   kernel_initializer='he_normal')(conv9)
    conv10 = Conv2D(filters=network_input.number_of_classes,
                    kernel_size=1,
                    activation='sigmoid')(conv9)

    model = KerasModel(inputs=inputs, outputs=conv10)
    model.model_name = "unet"

    return model
Ejemplo n.º 2
0
def insert_layer_nonseq(model,
                        layer_regex,
                        insert_layer_factory,
                        insert_layer_name=None,
                        position='after',
                        model_name=None,
                        only_last_node=False):

    # Auxiliary dictionary to describe the network graph
    network_dict = {'input_layers_of': {}, 'new_output_tensor_of': {}}

    # Set the input layers of each layer
    for layer in model.layers:
        if only_last_node:
            nodes = layer._outbound_nodes[-1:]
        else:
            nodes = layer._outbound_nodes
        for node in nodes:
            layer_name = node.outbound_layer.name
            if layer_name not in network_dict['input_layers_of']:
                network_dict['input_layers_of'].update(
                    {layer_name: [layer.name]})
            else:
                network_dict['input_layers_of'][layer_name].append(layer.name)

    # Set the output tensor of the input layer
    for i, layer in enumerate(model.layers):
        if isinstance(layer, InputLayer) or i == 0:
            network_dict['new_output_tensor_of'].update(
                {layer.name: layer.input})

    # Iterate over all layers after the input
    model_outputs = []
    for layer in model.layers:
        if layer.name in network_dict['new_output_tensor_of']:
            continue
        # Determine input tensors
        layer_input = [
            network_dict['new_output_tensor_of'][layer_aux]
            for layer_aux in network_dict['input_layers_of'][layer.name]
        ]

        # Insert layer if name matches the regular expression
        if re.match(layer_regex, layer.name):
            if position == 'replace':
                x = layer_input
            elif position == 'after':
                try:
                    x = layer(*layer_input)
                except TypeError as t:
                    if 'arguments' in str(t):
                        x = layer(layer_input)
                    else:
                        raise t
            elif position == 'before':
                pass
            else:
                raise ValueError('position must be: before, after or replace')

            insert_layer_name = '{}_{}'.format(layer.name, insert_layer_name)
            new_layer = insert_layer_factory(insert_layer_name)
            x = new_layer(x)
            # print('New layer: {} Old layer: {} Type: {}'.format(new_layer.name,
            #                                                 layer.name, position))
            if position == 'before':
                x = layer(x)
        else:
            try:
                x = layer(*layer_input)
            except TypeError as t:
                if 'arguments' in str(t):
                    x = layer(layer_input)
                else:
                    raise t

        # Set new output tensor (the original one, or the one of the inserted
        # layer)
        network_dict['new_output_tensor_of'].update({layer.name: x})

        # Save tensor in output list if it is output in initial model
        if layer.name in model.output_names:
            model_outputs.append(x)

    kwargs = {}
    if model_name is not None:
        kwargs['name'] = model_name
    return Model(inputs=model.inputs, outputs=model_outputs, **kwargs)
Ejemplo n.º 3
0
                                    'Flickr_8k.trainImages.txt')

train_image_paths = get_image_paths(images_dir, train_image_txt_file)

test_image_txt_file = os.path.join(text_file_base_dir,
                                   'Flickr_8k.testImages.txt')

test_image_paths = get_image_paths(images_dir, test_image_txt_file)

# descriptions
train_descriptions = load_clean_descriptions('descriptions.txt', train)
print('Descriptions: train=%d' % len(train_descriptions))

# Load the inception v3 model
model = InceptionV3(weights='imagenet', include_top=True)
model_new = Model(model.input, model.layers[-2].output)
# model_new = InceptionV3(weights='imagenet', include_top=False)
# Create a new model, by removing the last layer (output layer) from the inception v3
# model_new = Model(model.input, model.layers[-2].output)

# Call the funtion to encode all the train images
# This will take a while on CPU - Execute this only once
start = time.time()
encoding_train = {
    img[len(images_dir):]: encode(model_new, img)
    for img in tqdm(train_image_paths)
}
print("Train feature taken in seconds: ", time.time() - start)

# Save the bottleneck train features to disk
train_feat_path = os.path.join(
Ejemplo n.º 4
0
def create(n_classes=1,
           base=4,
           pretrained=False,
           pretrained_model_path='',
           learning_rate=1e-6,
           metrics=[dice]):
    if n_classes == 1:
        loss = 'binary_crossentropy'
        final_act = 'sigmoid'
    elif n_classes > 1:
        loss = 'categorical_crossentropy'
        final_act = 'softmax'

    if pretrained:
        model = load_model(pretrained_model_path,
                           custom_objects={
                               'dice':
                               dice,
                               'preprocess_input':
                               preprocess_input,
                               '_preprocess_symbolic_input':
                               _preprocess_symbolic_input
                           })
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate),
                      loss=loss,
                      metrics=metrics)
        model.summary()
        return model

    b = base
    inputs = Input(shape=INPUT_SHAPE)

    converted_inputs = tf.keras.layers.Lambda(
        lambda x: preprocess_input(x, mode='torch'))(inputs)

    x = Conv2D(2**(b + 1), (3, 3),
               strides=(2, 2),
               name='conv_1_1',
               use_bias=False,
               padding='same')(converted_inputs)
    x = BatchNormalization(name='conv_1_1_batch_normalization')(x)
    x = Activation('relu')(x)

    x = Conv2D(2**(b + 2), (3, 3),
               strides=(1, 1),
               padding='same',
               use_bias=False,
               dilation_rate=(1, 1),
               name='conv_1_2')(x)
    x = BatchNormalization(name='conv_1_2_batch_normalization')(x)
    x = Activation('relu')(x)

    x = xception_block(x, [128, 128, 128],
                       'xception_block_1',
                       skip_type='conv',
                       stride=2,
                       depth_activation=False)
    x, skip1 = xception_block(x, [256, 256, 256],
                              'xception_block_2',
                              skip_type='conv',
                              stride=2,
                              depth_activation=False,
                              return_skip=True)

    x = xception_block(x, [728, 728, 728],
                       'xception_block_3',
                       skip_type='conv',
                       stride=1,
                       depth_activation=False)
    for i in range(16):
        x = xception_block(x, [728, 728, 728],
                           'middle_flow_unit_{}'.format(i + 1),
                           skip_type='sum',
                           stride=1,
                           rate=2,
                           depth_activation=False)

    x = xception_block(x, [728, 1024, 1024],
                       'xception_block_4',
                       skip_type='conv',
                       stride=1,
                       rate=2,
                       depth_activation=False)
    x = xception_block(x, [1536, 1536, 2048],
                       'xception_block_5',
                       skip_type='none',
                       stride=1,
                       rate=4,
                       depth_activation=True)

    b4 = GlobalAveragePooling2D()(x)

    b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4)
    b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4)
    b4 = Conv2D(2**(b + 4), (1, 1),
                padding='same',
                use_bias=False,
                name='image_pooling')(b4)
    b4 = BatchNormalization(name='image_pooling_batch_normalization',
                            epsilon=1e-5)(b4)
    b4 = Activation('relu')(b4)

    size_before = int_shape(x)
    b4 = Lambda(lambda x: tf.compat.v1.image.resize(
        x, size_before[1:3], method='bilinear', align_corners=True))(b4)

    b0 = Conv2D(2**(b + 4), (1, 1),
                padding='same',
                use_bias=False,
                name='atrous_spatial_pyramid_pooling_base')(x)
    b0 = BatchNormalization(
        name='atrous_spatial_pyramid_pooling_base_batch_normalization',
        epsilon=1e-5)(b0)
    b0 = Activation('relu',
                    name='atrous_spatial_pyramid_pooling_base_activation')(b0)

    b1 = separable_conv_with_batch_normalization(
        x, 2**(b + 4), 'atrous_spatial_pyramid_pooling_1', rate=12)
    b2 = separable_conv_with_batch_normalization(
        x, 2**(b + 4), 'atrous_spatial_pyramid_pooling_2', rate=24)
    b3 = separable_conv_with_batch_normalization(
        x, 2**(b + 4), 'atrous_spatial_pyramid_pooling_3', rate=36)

    x = Concatenate()([b4, b0, b1, b2, b3])

    x = Conv2D(2**(b + 4), (1, 1),
               padding='same',
               use_bias=False,
               name='concat_projection')(x)
    x = BatchNormalization(name='concat_projection_batch_normalization',
                           epsilon=1e-5)(x)
    x = Activation('relu')(x)
    x = Dropout(0.1)(x)

    skip_size = int_shape(skip1)
    x = Lambda(lambda xx: tf.compat.v1.image.resize(
        xx, skip_size[1:3], method='bilinear', align_corners=True))(x)

    dec_skip1 = Conv2D(48, (1, 1),
                       padding='same',
                       use_bias=False,
                       name='feature_projection0')(skip1)
    dec_skip1 = BatchNormalization(
        name='feature_projection0_batch_normalization',
        epsilon=1e-5)(dec_skip1)
    dec_skip1 = Activation('relu')(dec_skip1)

    x = Concatenate()([x, dec_skip1])
    x = separable_conv_with_batch_normalization(x, 2**(b + 4),
                                                'decoder_convolution_1')
    x = separable_conv_with_batch_normalization(x, 2**(b + 4),
                                                'decoder_convolution_2')

    x = Conv2D(n_classes, (1, 1), padding='same', name="last_layer")(x)
    size_before3 = int_shape(inputs)
    x = Lambda(lambda xx: tf.compat.v1.image.resize(
        xx, size_before3[1:3], method='bilinear', align_corners=True))(x)

    outputs = tf.keras.layers.Activation(final_act)(x)

    model = Model(inputs, outputs, name='deeplabv3plus')
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss=loss,
        metrics=metrics)
    model.summary()

    return model
Ejemplo n.º 5
0
def generator_2d(inputs_dim, z_dim):
    down_stack = [
        downsample(32,
                   input_shape=[20, 49, inputs_dim + z_dim],
                   apply_batchnorm=False,
                   layer_type='conv'),
        downsample(64, input_shape=[10, 25, 32], layer_type='conv'),
        downsample(128, input_shape=[5, 13, 64], layer_type='conv'),
        downsample(256, input_shape=[3, 7, 128], layer_type='conv'),
        downsample(256, input_shape=[2, 4, 256], layer_type='conv'),
    ]

    up_stack = [
        upsample(256,
                 input_shape=[1, 2, 256],
                 apply_dropout=False,
                 layer_type='conv',
                 output_padding=(1, 1)),
        upsample(128,
                 input_shape=[2, 4, 512],
                 layer_type='conv',
                 output_padding=(0, 0)),
        upsample(64,
                 input_shape=[3, 7, 256],
                 layer_type='conv',
                 output_padding=(0, 0)),
        upsample(32,
                 input_shape=[5, 13, 128],
                 layer_type='conv',
                 output_padding=(1, 0)),
    ]
    initializer = random_normal_initializer(0., 0.02)
    last = layers.Conv2DTranspose(1,
                                  kernel_size=3,
                                  strides=2,
                                  padding='same',
                                  output_padding=(1, 0),
                                  kernel_initializer=initializer,
                                  activation='tanh')

    inputs = layers.Input(shape=[20, 49, inputs_dim])
    if z_dim:
        z = layers.Input(shape=[20, 49, z_dim])
        x = layers.concatenate([inputs, z])
        inp = [inputs, z]
    else:
        x = inputs
        inp = inputs

    skips = []
    for down in down_stack:
        x = down(x)
        skips.append(x)

    skips = reversed(skips[:-1])

    for up, skip in zip(up_stack, skips):
        x = up(x)
        x = layers.concatenate([x, skip])

    x = last(x)

    return Model(inputs=inp, outputs=x)
def generate_vgg_model_advance_and_density(classes_len: int):
    """
    Function to create a VGG19 model pre-trained with custom FC Layers.
    If the "advanced" command line argument is selected, adds an extra convolutional layer with extra filters to support
    larger images.
    :param classes_len: The number of classes (labels).
    :return: The VGG19 model.
    """
    # Reconfigure single channel input into a greyscale 3 channel input
    img_input = Input(shape=(config.VGG_IMG_SIZE['HEIGHT'], config.VGG_IMG_SIZE['WIDTH'], 1))

    # Add convolution and pooling layers
    model = Sequential()
    model.add(img_input)
    for i in range (0, config.CONV_CNT):
        model.add(Conv2D(3, (3, 3),
                         activation='relu',
                         padding='same'))
        model.add(MaxPooling2D((2, 2), strides=(2, 2)))
        
#     model.add(Conv2D(3, (5, 5),
#                      activation='relu',
#                      padding='same'))
#     model.add(MaxPooling2D((2, 2), strides=(2, 2)))

#     model.add(Conv2D(3, (3, 3),
#                      activation='relu',
#                      padding='same'))
#     model.add(MaxPooling2D((2, 2), strides=(2, 2)))
        
    # Generate a VGG19 model with pre-trained ImageNet weights, input as given above, excluded fully connected layers.
    model_base = VGG19(include_top=False, weights='imagenet')
    
    # Start with base model consisting of convolutional layers
    model.add(model_base)

    # Flatten layer to convert each input into a 1D array (no parameters in this layer, just simple pre-processing).
    model.add(Flatten())
    
    # Possible dropout for regularisation can be added later and experimented with:
    if config.DROPOUT != 0:
        model.add(Dropout(config.DROPOUT, name='Dropout_Regularization_1'))

    # Add fully connected hidden layers.
    model.add(Dense(units=512, activation='relu', name='Dense_Intermediate_1'))
    model.add(Dense(units=32, activation='relu', name='Dense_Intermediate_2'))

    model_density = Sequential()
    model_density.add(Dense(int(config.model.split('-')[1]), input_shape=(int(config.model.split('-')[1]),), activation='relu'))
    
    model_concat = concatenate([model.output, model_density.output], axis=-1)
    
    # Final output layer that uses softmax activation function (because the classes are exclusive).
    if classes_len == 2:
        model_concat = Dense(1, activation='sigmoid', name='Output')(model_concat)
    else:
        model_concat = Dense(classes_len, activation='softmax', name='Output')(model_concat)
    
    model_combine = Model(inputs=[model.input, model_density.input], outputs=model_concat)

    # Print model details if running in debug mode.
    if config.verbose_mode:
        print(model_combine.summary())

    return model_combine
def resnet_v2(input_shape, depth, num_classes=10):
    """ResNet Version 2 Model builder [b]

    Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as
    bottleneck layer
    First shortcut connection per layer is 1 x 1 Conv2D.
    Second and onwards shortcut connection is identity.
    At the beginning of each stage, the feature map size is halved (downsampled)
    by a convolutional layer with strides=2, while the number of filter maps is
    doubled. Within each stage, the layers have the same number filters and the
    same filter map sizes.
    Features maps sizes:
    conv1  : 32x32,  16
    stage 0: 32x32,  64
    stage 1: 16x16, 128
    stage 2:  8x8,  256

    # Arguments
        input_shape (tensor): shape of input image tensor
        depth (int): number of core convolutional layers
        num_classes (int): number of classes (CIFAR10 has 10)

    # Returns
        model (Model): Keras model instance
    """
    if (depth - 2) % 9 != 0:
        raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')
    # Start model definition.
    num_filters_in = 16
    num_res_blocks = int((depth - 2) / 9)

    inputs = Input(shape=input_shape)
    # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths
    x = resnet_layer(inputs=inputs,
                     num_filters=num_filters_in,
                     conv_first=True)

    # Instantiate the stack of residual units
    for stage in range(3):
        for res_block in range(num_res_blocks):
            activation = 'relu'
            batch_normalization = True
            strides = 1
            if stage == 0:
                num_filters_out = num_filters_in * 4
                if res_block == 0:  # first layer and first stage
                    activation = None
                    batch_normalization = False
            else:
                num_filters_out = num_filters_in * 2
                if res_block == 0:  # first layer but not first stage
                    strides = 2  # downsample

            # bottleneck residual unit
            y = resnet_layer(inputs=x,
                             num_filters=num_filters_in,
                             kernel_size=1,
                             strides=strides,
                             activation=activation,
                             batch_normalization=batch_normalization,
                             conv_first=False)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters_in,
                             conv_first=False)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters_out,
                             kernel_size=1,
                             conv_first=False)
            if res_block == 0:
                # linear projection residual shortcut connection to match
                # changed dims
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters_out,
                                 kernel_size=1,
                                 strides=strides,
                                 activation=None,
                                 batch_normalization=False)
            x = keras.layers.add([x, y])

        num_filters_in = num_filters_out

    # Add classifier on top.
    # v2 has BN-ReLU before Pooling
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = AveragePooling2D(pool_size=8)(x)
    y = Flatten()(x)
    outputs = Dense(num_classes,
                    activation='softmax',
                    kernel_initializer='he_normal')(y)

    # Instantiate model.
    model = Model(inputs=inputs, outputs=outputs)
    return model
def get_temp_view_model(width, height, bs=1, bi_style=False):
    input_o = layers.Input(shape=(height, width, 3), dtype='float32')

    y = InputReflect(width, height, name='output')(input_o)
    total_variation_loss = layers.Lambda(get_tv_loss, output_shape=(1,), name='tv',
                                         arguments={'width': width, 'height': height})([y])

    content_activation = layers.Input(shape=(height // 2, width // 2, 128), dtype='float32')
    style_activation1 = layers.Input(shape=(height, width, 64), dtype='float32')
    style_activation2 = layers.Input(shape=(height // 2, width // 2, 128), dtype='float32')
    style_activation3 = layers.Input(shape=(height // 4, width // 4, 256), dtype='float32')
    style_activation4 = layers.Input(shape=(height // 8, width // 8, 512), dtype='float32')

    if bi_style:
        style_activation1_2 = layers.Input(shape=(height, width, 64), dtype='float32')
        style_activation2_2 = layers.Input(shape=(height // 2, width // 2, 128), dtype='float32')
        style_activation3_2 = layers.Input(shape=(height // 4, width // 4, 256), dtype='float32')
        style_activation4_2 = layers.Input(shape=(height // 8, width // 8, 512), dtype='float32')

    # Block 1
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(y)
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    style_loss1 = layers.Lambda(get_style_loss, output_shape=(1,),
                                name='style1', arguments={'batch_size': bs})([x, style_activation1])
    if bi_style:
        style_loss1_2 = layers.Lambda(get_style_loss, output_shape=(1,),
                                      name='style1_2', arguments={'batch_size': bs})([x, style_activation1_2])
        style_loss1 = AverageAddTwo(name='style1_out')([style_loss1, style_loss1_2])
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    content_loss = layers.Lambda(get_content_loss, output_shape=(1,), name='content')([x, content_activation])
    style_loss2 = layers.Lambda(get_style_loss, output_shape=(1,),
                                name='style2', arguments={'batch_size': bs})([x, style_activation2])
    if bi_style:
        style_loss2_2 = layers.Lambda(get_style_loss, output_shape=(1,),
                                      name='style2_2', arguments={'batch_size': bs})([x, style_activation2_2])
        style_loss2 = AverageAddTwo(name='style2_out')([style_loss2, style_loss2_2])
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    style_loss3 = layers.Lambda(get_style_loss, output_shape=(1,),
                                name='style3', arguments={'batch_size': bs})([x, style_activation3])
    if bi_style:
        style_loss3_2 = layers.Lambda(get_style_loss, output_shape=(1,),
                                      name='style3_2', arguments={'batch_size': bs})([x, style_activation3_2])
        style_loss3 = AverageAddTwo(name='style3_out')([style_loss3, style_loss3_2])
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    style_loss4 = layers.Lambda(get_style_loss, output_shape=(1,),
                                name='style4', arguments={'batch_size': bs})([x, style_activation4])
    if bi_style:
        style_loss4_2 = layers.Lambda(get_style_loss, output_shape=(1,),
                                      name='style4_2', arguments={'batch_size': bs})([x, style_activation4_2])
        style_loss4 = AverageAddTwo(name='style4_out')([style_loss4, style_loss4_2])
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

    if bi_style:
        model = Model(
            [input_o, content_activation, style_activation1, style_activation2, style_activation3,
             style_activation4,
             style_activation1_2, style_activation2_2, style_activation3_2, style_activation4_2],
            [content_loss, style_loss1, style_loss2, style_loss3, style_loss4, total_variation_loss, y])
    else:
        model = Model(
            [input_o, content_activation, style_activation1, style_activation2, style_activation3,
             style_activation4],
            [content_loss, style_loss1, style_loss2, style_loss3, style_loss4, total_variation_loss, y])
    model_layers = {layer.name: layer for layer in model.layers}
    original_vgg = vgg16.VGG16(weights='imagenet', include_top=False)
    original_vgg_layers = {layer.name: layer for layer in original_vgg.layers}

    # load image_net weight
    for layer in original_vgg.layers:
        if layer.name in model_layers:
            model_layers[layer.name].set_weights(original_vgg_layers[layer.name].get_weights())
            model_layers[layer.name].trainable = False

    print("temp_view model built successfully!")
    return model
def get_training_model(width, height, bs=1, bi_style=False):
    input_o = layers.Input(shape=(height, width, 3), dtype='float32', name='input_o')

    c1 = layers.Conv2D(32, (9, 9), strides=1, padding='same', name='conv_1')(input_o)
    c1 = layers.BatchNormalization(name='normal_1')(c1)
    c1 = layers.Activation('relu', name='relu_1')(c1)

    c2 = layers.Conv2D(64, (3, 3), strides=2, padding='same', name='conv_2')(c1)
    c2 = layers.BatchNormalization(name='normal_2')(c2)
    c2 = layers.Activation('relu', name='relu_2')(c2)

    c3 = layers.Conv2D(128, (3, 3), strides=2, padding='same', name='conv_3')(c2)
    c3 = layers.BatchNormalization(name='normal_3')(c3)
    c3 = layers.Activation('relu', name='relu_3')(c3)

    r1 = residual_block(c3, 1)
    r2 = residual_block(r1, 2)
    r3 = residual_block(r2, 3)
    r4 = residual_block(r3, 4)
    r5 = residual_block(r4, 5)

    d1 = layers.Conv2DTranspose(64, (3, 3), strides=2, padding='same', name='conv_4')(r5)
    d1 = layers.BatchNormalization(name='normal_4')(d1)
    d1 = layers.Activation('relu', name='relu_4')(d1)

    d2 = layers.Conv2DTranspose(32, (3, 3), strides=2, padding='same', name='conv_5')(d1)
    d2 = layers.BatchNormalization(name='normal_5')(d2)
    d2 = layers.Activation('relu', name='relu_5')(d2)

    c4 = layers.Conv2D(3, (9, 9), strides=1, padding='same', name='conv_6')(d2)
    c4 = layers.BatchNormalization(name='normal_6')(c4)
    c4 = layers.Activation('tanh', name='tanh_1')(c4)
    c4 = OutputScale(name='output')(c4)

    content_activation = layers.Input(shape=(height // 2, width // 2, 128), dtype='float32')
    style_activation1 = layers.Input(shape=(height, width, 64), dtype='float32')
    style_activation2 = layers.Input(shape=(height // 2, width // 2, 128), dtype='float32')
    style_activation3 = layers.Input(shape=(height // 4, width // 4, 256), dtype='float32')
    style_activation4 = layers.Input(shape=(height // 8, width // 8, 512), dtype='float32')

    if bi_style:
        style_activation1_2 = layers.Input(shape=(height, width, 64), dtype='float32')
        style_activation2_2 = layers.Input(shape=(height // 2, width // 2, 128), dtype='float32')
        style_activation3_2 = layers.Input(shape=(height // 4, width // 4, 256), dtype='float32')
        style_activation4_2 = layers.Input(shape=(height // 8, width // 8, 512), dtype='float32')

    total_variation_loss = layers.Lambda(get_tv_loss, output_shape=(1,), name='tv',
                                         arguments={'width': width, 'height': height})([c4])

    # Block 1
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(c4)
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    style_loss1 = layers.Lambda(get_style_loss, output_shape=(1,),
                                name='style1', arguments={'batch_size': bs})([x, style_activation1])
    if bi_style:
        style_loss1_2 = layers.Lambda(get_style_loss, output_shape=(1,),
                                      name='style1_2', arguments={'batch_size': bs})([x, style_activation1_2])
        style_loss1 = AverageAddTwo(name='style1_out')([style_loss1, style_loss1_2])
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    content_loss = layers.Lambda(get_content_loss, output_shape=(1,), name='content')([x, content_activation])
    style_loss2 = layers.Lambda(get_style_loss, output_shape=(1,),
                                name='style2', arguments={'batch_size': bs})([x, style_activation2])
    if bi_style:
        style_loss2_2 = layers.Lambda(get_style_loss, output_shape=(1,),
                                      name='style2_2', arguments={'batch_size': bs})([x, style_activation2_2])
        style_loss2 = AverageAddTwo(name='style2_out')([style_loss2, style_loss2_2])
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    style_loss3 = layers.Lambda(get_style_loss, output_shape=(1,),
                                name='style3', arguments={'batch_size': bs})([x, style_activation3])
    if bi_style:
        style_loss3_2 = layers.Lambda(get_style_loss, output_shape=(1,),
                                      name='style3_2', arguments={'batch_size': bs})([x, style_activation3_2])
        style_loss3 = AverageAddTwo(name='style3_out')([style_loss3, style_loss3_2])
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    style_loss4 = layers.Lambda(get_style_loss, output_shape=(1,),
                                name='style4', arguments={'batch_size': bs})([x, style_activation4])
    if bi_style:
        style_loss4_2 = layers.Lambda(get_style_loss, output_shape=(1,),
                                      name='style4_2', arguments={'batch_size': bs})([x, style_activation4_2])
        style_loss4 = AverageAddTwo(name='style4_out')([style_loss4, style_loss4_2])
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

    if bi_style:
        model = Model(
            [input_o, content_activation, style_activation1, style_activation2, style_activation3, style_activation4,
             style_activation1_2, style_activation2_2, style_activation3_2, style_activation4_2],
            [content_loss, style_loss1, style_loss2, style_loss3, style_loss4, total_variation_loss, c4])
    else:
        model = Model(
            [input_o, content_activation, style_activation1, style_activation2, style_activation3, style_activation4],
            [content_loss, style_loss1, style_loss2, style_loss3, style_loss4, total_variation_loss, c4])
    model_layers = {layer.name: layer for layer in model.layers}
    original_vgg = vgg16.VGG16(weights='imagenet', include_top=False)
    original_vgg_layers = {layer.name: layer for layer in original_vgg.layers}

    # load image_net weight
    for layer in original_vgg.layers:
        if layer.name in model_layers:
            model_layers[layer.name].set_weights(original_vgg_layers[layer.name].get_weights())
            model_layers[layer.name].trainable = False

    print("training model built successfully!")
    return model
Ejemplo n.º 10
0
    # x.add(Dropout(0.3))
    # x.add(Dense(1, activation='sigmoid'))
    # LSTM
    x.add(LSTM(n_hidden))

    shared_model = x

    # The visible layer
    left_input = Input(shape=(max_seq_length, ), dtype='int32')
    right_input = Input(shape=(max_seq_length, ), dtype='int32')

    # Pack it all up into a Manhattan Distance model
    malstm_distance = ManDist()(
        [shared_model(left_input),
         shared_model(right_input)])
    model = Model(inputs=[left_input, right_input], outputs=[malstm_distance])

    if gpus >= 2:
        # `multi_gpu_model()` is a so quite buggy. it breaks the saved model.
        model = tf.keras.utils.multi_gpu_model(model, gpus=gpus)
    model.compile(loss='mean_squared_error',
                  optimizer=tf.keras.optimizers.Adam(),
                  metrics=['accuracy'])
    model.summary()
    shared_model.summary()

    # Start trainings
    training_start_time = time()
    malstm_trained = model.fit([X_train['left'], X_train['right']],
                               Y_train,
                               batch_size=batch_size,
def InceptionV3(input_shape=None,
                classes=3,
                weights=None):


    img_input = Input(shape=input_shape)

    if image_data_format() == 'channels_first':
        channel_axis = 1
    else:
        channel_axis = 3

    x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid')
    x = conv2d_bn(x, 32, 3, 3, padding='valid')
    x = conv2d_bn(x, 64, 3, 3)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = conv2d_bn(x, 80, 1, 1, padding='valid')
    x = conv2d_bn(x, 192, 3, 3, padding='valid')
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)

    # mixed 0: 35 x 35 x 256
    branch1x1 = conv2d_bn(x, 64, 1, 1)

    branch5x5 = conv2d_bn(x, 48, 1, 1)
    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

    branch_pool = AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 32, 1, 1)
    x = concatenate(
        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name='mixed0')

    # mixed 1: 35 x 35 x 288
    branch1x1 = conv2d_bn(x, 64, 1, 1)

    branch5x5 = conv2d_bn(x, 48, 1, 1)
    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

    branch_pool = AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
    x = concatenate(
        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name='mixed1')

    # mixed 2: 35 x 35 x 288
    branch1x1 = conv2d_bn(x, 64, 1, 1)

    branch5x5 = conv2d_bn(x, 48, 1, 1)
    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

    branch_pool = AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
    x = concatenate(
        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name='mixed2')

    # mixed 3: 17 x 17 x 768
    branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid')

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
    branch3x3dbl = conv2d_bn(
        branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid')

    branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x)
    x = concatenate(
        [branch3x3, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name='mixed3')

    # mixed 4: 17 x 17 x 768
    branch1x1 = conv2d_bn(x, 192, 1, 1)

    branch7x7 = conv2d_bn(x, 128, 1, 1)
    branch7x7 = conv2d_bn(branch7x7, 128, 1, 7)
    branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)

    branch7x7dbl = conv2d_bn(x, 128, 1, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)

    branch_pool = AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
    x = concatenate(
        [branch1x1, branch7x7, branch7x7dbl, branch_pool],
        axis=channel_axis,
        name='mixed4')

    # mixed 5, 6: 17 x 17 x 768
    for i in range(2):
        branch1x1 = conv2d_bn(x, 192, 1, 1)

        branch7x7 = conv2d_bn(x, 160, 1, 1)
        branch7x7 = conv2d_bn(branch7x7, 160, 1, 7)
        branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)

        branch7x7dbl = conv2d_bn(x, 160, 1, 1)
        branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
        branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7)
        branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
        branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)

        branch_pool = AveragePooling2D(
            (3, 3), strides=(1, 1), padding='same')(x)
        branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
        x = concatenate(
            [branch1x1, branch7x7, branch7x7dbl, branch_pool],
            axis=channel_axis,
            name='mixed' + str(5 + i))

    # mixed 7: 17 x 17 x 768
    branch1x1 = conv2d_bn(x, 192, 1, 1)

    branch7x7 = conv2d_bn(x, 192, 1, 1)
    branch7x7 = conv2d_bn(branch7x7, 192, 1, 7)
    branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)

    branch7x7dbl = conv2d_bn(x, 192, 1, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)

    branch_pool = AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
    x = concatenate(
        [branch1x1, branch7x7, branch7x7dbl, branch_pool],
        axis=channel_axis,
        name='mixed7')

    # mixed 8: 8 x 8 x 1280
    branch3x3 = conv2d_bn(x, 192, 1, 1)
    branch3x3 = conv2d_bn(branch3x3, 320, 3, 3,
                          strides=(2, 2), padding='valid')

    branch7x7x3 = conv2d_bn(x, 192, 1, 1)
    branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7)
    branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1)
    branch7x7x3 = conv2d_bn(
        branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid')

    branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x)
    x = concatenate(
        [branch3x3, branch7x7x3, branch_pool],
        axis=channel_axis,
        name='mixed8')

    # mixed 9: 8 x 8 x 2048
    for i in range(2):
        branch1x1 = conv2d_bn(x, 320, 1, 1)

        branch3x3 = conv2d_bn(x, 384, 1, 1)
        branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3)
        branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1)
        branch3x3 = concatenate(
            [branch3x3_1, branch3x3_2],
            axis=channel_axis,
            name='mixed9_' + str(i))

        branch3x3dbl = conv2d_bn(x, 448, 1, 1)
        branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3)
        branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3)
        branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1)
        branch3x3dbl = concatenate(
            [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis)

        branch_pool = AveragePooling2D(
            (3, 3), strides=(1, 1), padding='same')(x)
        branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
        x = concatenate(
            [branch1x1, branch3x3, branch3x3dbl, branch_pool],
            axis=channel_axis,
            name='mixed' + str(9 + i))

    x = GlobalAveragePooling2D(name='avg_pool')(x)
    x = Dropout(hyperparameters.dropout)(x)
    # softmax classifier
    x = Flatten()(x)
    x = Dense(classes)(x)
    x = Activation("softmax")(x)

    inputs = img_input
    # Create model.
    return Model(inputs, x, name='inception_v3')
        )  # normalize on timesteps dimension
        internal = normalized_attention_weights * internal
        print(internal)
        attention_vector = K.sum(internal, axis=1)  # sum on timesteps
        print(attention_vector)
        # recurrent_fusion_model.add(Dense(hidden_state // 2, activation='relu'))
        # recurrent_fusion_model.add(BatchNormalization())
        internal = self.FC_1(attention_vector)
        # internal = self.FC_2(internal)
        final_output = self.classification_layer(internal)

        return final_output


# create the model
recurrent_fusion_model = Model()
recurrent_fusion_model.compile(optimizer=keras.optimizers.Adam(lr=lr),
                               loss=sparse_categorical_cross_entropy_loss,
                               metrics=[acc_top_1, acc_top_5])

# build internal tensors
recurrent_fusion_model.fit(*next(train_generator()),
                           batch_size=1,
                           epochs=1,
                           verbose=0)

# get tensorflow saver ready > will be used if a checkpoint found on drive
saver = tf.train.Saver(recurrent_fusion_model.variables)

if checkpoint_found:
    # restore the model from the checkpoint
Ejemplo n.º 13
0
def create_model(backborn, features_pixel, input_shape=(512, 512, 3), n_vocab=10):
    image = Input(shape=input_shape, name="image")
    sampled_text_region = Input(shape=(4,), name="sampled_text_region")
    labels = Input(shape=(generator.MAX_LENGTH,), name="labels", dtype=tf.float32)
    label_length = Input(shape=(1,), name="label_length", dtype=tf.int64)

    fmap = backborn(image)
    bbox_output = Conv2D(5, kernel_size=1, name="bbox")(fmap)

    # RoI Pooling and OCR
    roi_horizontal, widths = Lambda(
        lambda args: _roi_pooling_horizontal(args[0], args[1])
    )([fmap, sampled_text_region])
    widths = Lambda(lambda x: x, name="widths")(widths)
    text_recognition_horizontal_model = _text_recognition_horizontal_model(
        roi_horizontal.shape[1:], n_vocab
    )
    smashed_horizontal = text_recognition_horizontal_model(roi_horizontal)
    roi_vertical, heights = Lambda(
        lambda args: _roi_pooling_vertical(args[0], args[1])
    )([fmap, sampled_text_region])
    heights = Lambda(lambda x: x, name="heights")(heights)
    text_recognition_vertical_model = _text_recognition_vertical_model(
        roi_vertical.shape[1:], n_vocab
    )
    smashed_vertical = text_recognition_vertical_model(roi_vertical)

    # pad to merge horizontal and vertical tensors
    smashed_horizontal, smashed_vertical = Lambda(_pad_horizontal_and_vertical)(
        [smashed_horizontal, smashed_vertical]
    )
    length = Lambda(
        lambda args: tf.where(
            tf.greater(tf.squeeze(widths, axis=-1), 0), args[0], args[1]
        )
    )([widths, heights])
    smashed = Lambda(
        lambda args: tf.where(
            tf.greater(tf.squeeze(widths, axis=-1), 0), args[0], args[1]
        )
    )([smashed_horizontal, smashed_vertical])

    ctc_loss = Lambda(_ctc_lambda_func, output_shape=(1,), name="ctc")(
        [smashed, labels, length, label_length]
    )

    training_model = Model(
        [image, sampled_text_region, labels, label_length], [bbox_output, ctc_loss]
    )
    training_model.compile(
        "adam",
        loss={"bbox": _loss, "ctc": lambda y_true, y_pred: y_pred},
        metrics={
            "bbox": [
                _metric_confidence_accuracy,
                _metric_iou,
                _metric_loss_confidence,
                __loss_iou,
            ]
        },
    )

    # prediction model
    confidence = Activation("sigmoid")(
        Lambda(lambda x: x[..., 0], name="confidence")(bbox_output)
    )
    bounding_boxes = Lambda(
        lambda x: _reconstruct_boxes(x[..., 1:5], features_pixel=features_pixel),
        name="box",
    )(bbox_output)
    MAX_BOX = 32

    def nms_fn(args):
        boxes, scores = args

        def mapper(i):
            bbs, ss = boxes[i], scores[i]
            bbs = tf.reshape(bbs, [-1, 4])
            ss = tf.reshape(ss, [-1])
            indices = tf.image.non_max_suppression(bbs, ss, MAX_BOX)
            bbs = tf.gather(bbs, indices)
            ss = tf.gather(ss, indices)
            return tf.where(tf.greater_equal(ss, 0.5), bbs, tf.zeros_like(bbs))

        idx = tf.range(0, tf.shape(boxes)[0])
        return tf.map_fn(mapper, idx, dtype=tf.float32)

    def crop_and_ocr(args):
        images, boxes = args
        boxes = boxes / features_pixel

        ratios = (boxes[..., 2] - boxes[..., 0]) / (boxes[..., 3] - boxes[..., 1])
        vertial_ratios = 1 / ratios
        non_zero_boxes = tf.logical_or(
            tf.greater_equal(boxes[..., 2] - boxes[..., 0], 0.1),
            tf.greater_equal(boxes[..., 3] - boxes[..., 1], 0.1),
        )
        ratios = tf.where(non_zero_boxes, ratios, tf.zeros_like(ratios))
        vertial_ratios = tf.where(
            non_zero_boxes, vertial_ratios, tf.zeros_like(vertial_ratios)
        )
        max_width = tf.to_int32(tf.ceil(tf.reduce_max(ratios * _ROI_HEIGHT)))
        max_height = tf.to_int32(tf.ceil(tf.reduce_max(vertial_ratios * _ROI_WIDTH)))
        max_length = tf.maximum(max_width, max_height)

        def _mapper(i):
            bbs = boxes[:, i, :]
            roi_horizontal, widths = _roi_pooling_horizontal(images, bbs)
            smashed_horizontal = text_recognition_horizontal_model(roi_horizontal)
            roi_vertical, heights = _roi_pooling_vertical(images, bbs)
            smashed_vertical = text_recognition_vertical_model(roi_vertical)
            widths = tf.squeeze(widths, axis=-1)
            heights = tf.squeeze(heights, axis=-1)
            smashed_horizontal, smashed_vertical = _pad_horizontal_and_vertical(
                [smashed_horizontal, smashed_vertical]
            )
            smashed = tf.where(
                tf.not_equal(widths, 0), smashed_horizontal, smashed_vertical
            )
            lengths = tf.where(tf.not_equal(widths, 0), widths, heights)
            cond = tf.not_equal(tf.shape(smashed)[1], 0)

            def then_branch():
                decoded, _probas = tf.keras.backend.ctc_decode(
                    smashed, lengths, greedy=False
                )
                return tf.pad(
                    decoded[0],
                    [[0, 0], [0, max_length - tf.shape(decoded[0])[1]]],
                    constant_values=-1,
                )

            def else_branch():
                return -tf.ones((tf.shape(bbs)[0], max_length), dtype=tf.int64)

            return tf.cond(cond, then_branch, else_branch)

        text_recognition = tf.map_fn(_mapper, tf.range(0, MAX_BOX), dtype=tf.int64)
        return tf.transpose(text_recognition, [1, 0, 2])

    nms_boxes = Lambda(nms_fn, name="nms_boxes")([bounding_boxes, confidence])
    text = Lambda(crop_and_ocr, name="text")([fmap, nms_boxes])
    prediction_model = Model([image], [nms_boxes, text])

    return training_model, prediction_model
Ejemplo n.º 14
0
    def create_model(self, embedding, types, task='classifier'):
        seed = RandomNormal(mean=0.0, stddev=0.05, seed=42)
        # cora:42
        vi = Input(shape=(), dtype=tf.int32)
        vj = Input(shape=(), dtype=tf.int32)

        # Pre-training output
        walk_emb = Embedding(self.emb_size,
                             self.emb_dim,
                             trainable=False,
                             weights=[embedding['walk']])
        stru_emb = Embedding(self.emb_size,
                             self.emb_dim,
                             trainable=False,
                             weights=[embedding['stru']])
        # link_emb = Embedding(self.emb_size, self.emb_dim, trainable=False, weights=[embedding['link']])
        attr_emb = Embedding(self.emb_size,
                             self.emb_dim,
                             trainable=False,
                             weights=[embedding['attr']])
        walk_stru_emb = Embedding(self.emb_size,
                                  self.emb_dim,
                                  trainable=False,
                                  weights=[embedding['walk_stru']])
        classes_emb = Embedding(self.emb_size,
                                self.emb_dim,
                                trainable=False,
                                weights=[embedding['classes']])

        concat, shape = None, 5
        if task == 'classifier':
            concat = tf.concat(
                [walk_emb(vi),
                 stru_emb(vi),
                 attr_emb(vi),
                 walk_stru_emb(vi)],
                axis=1)
        if task == 'link':
            concat_vi = tf.concat([
                walk_emb(vi),
                stru_emb(vi),
                attr_emb(vi),
                classes_emb(vi),
                walk_stru_emb(vi)
            ],
                                  axis=1)
            concat_vj = tf.concat([
                walk_emb(vj),
                stru_emb(vj),
                attr_emb(vj),
                classes_emb(vi),
                walk_stru_emb(vj)
            ],
                                  axis=1)
            concat = concat_vi * concat_vj
            # concat = tf.concat([walk_emb(vi)], axis=1)

        attention = Dense(concat.shape[1],
                          activation='softmax',
                          kernel_initializer=seed)(concat)
        attention = concat * attention

        reshape = tf.reshape(attention, shape=(-1, shape, self.emb_dim))
        reshape = tf.expand_dims(reshape, -1)
        conv = None
        for i, size in enumerate([[shape, 5], [shape, 3], [shape, 2]]):
            conv2d = Conv2D(filters=5,
                            kernel_size=size,
                            kernel_initializer=seed,
                            padding='same')(reshape)
            pool = AveragePooling2D(pool_size=(1, 2))(conv2d)
            dim = pool.shape[1] * pool.shape[2] * pool.shape[3]
            conv2d = tf.reshape(pool, shape=(-1, dim))
            if i == 0:
                conv = conv2d
            else:
                conv += conv2d  # tf.concat([conv, conv2d], axis=1)

        attention = Dense(concat.shape[1],
                          activation='softmax',
                          kernel_initializer=seed)(concat)
        attention = concat * attention

        res = tf.concat([attention, conv], axis=1)

        output = Dense(types, activation='softmax',
                       kernel_initializer=seed)(res)

        input = [vi]
        if task == 'link':
            input = [vi, vj]
        model = Model(inputs=[input], outputs=[output])

        return model
Ejemplo n.º 15
0
 def Build(self):
     encoder_input, encoder_output = self.layers()
     return Model(inputs=encoder_input, outputs=encoder_output)
Ejemplo n.º 16
0
    def define_model(self):
        input_images = Input(shape=[
            self.model_parameters.img_height, self.model_parameters.img_width,
            self.model_parameters.num_channels
        ])

        x1 = layers.Conv2D(
            filters=64,
            kernel_size=(7, 7),
            strides=(2, 2),
            padding='same',
            use_bias=False,
        )(input_images)
        x1 = tfa.layers.InstanceNormalization()(x1)
        x1 = layers.ReLU()(x1)

        x2 = layers.Conv2D(
            filters=128,
            kernel_size=(3, 3),
            strides=(2, 2),
            padding='same',
            use_bias=False,
        )(x1)
        x2 = tfa.layers.InstanceNormalization()(x2)
        x2 = layers.ReLU()(x2)

        x3 = layers.Conv2D(
            filters=256,
            kernel_size=(3, 3),
            strides=(2, 2),
            padding='same',
            use_bias=False,
        )(x2)
        x3 = tfa.layers.InstanceNormalization()(x3)
        x3 = layers.ReLU()(x3)

        x4 = layers.Conv2D(
            filters=512,
            kernel_size=(3, 3),
            strides=(2, 2),
            padding='same',
            use_bias=False,
        )(x3)
        x4 = tfa.layers.InstanceNormalization()(x4)
        x4 = layers.ReLU()(x4)

        x5 = layers.UpSampling2D()(x4)
        x5 = layers.Concatenate()([x5, x3])

        x5 = layers.Conv2D(
            filters=256,
            kernel_size=(3, 3),
            strides=(1, 1),
            padding='same',
            use_bias=False,
        )(x5)
        x5 = tfa.layers.InstanceNormalization()(x5)
        x5 = layers.LeakyReLU(alpha=0.2)(x5)

        x6 = layers.UpSampling2D()(x5)
        x6 = layers.Concatenate()([x6, x2])

        x6 = layers.Conv2D(
            filters=128,
            kernel_size=(3, 3),
            strides=(1, 1),
            padding='same',
            use_bias=False,
        )(x6)
        x6 = tfa.layers.InstanceNormalization()(x6)
        x6 = layers.LeakyReLU(alpha=0.2)(x6)

        x7 = layers.UpSampling2D()(x6)
        x7 = layers.Concatenate()([x7, x1])
        x7 = layers.Conv2D(
            filters=64,
            kernel_size=(3, 3),
            strides=(1, 1),
            padding='same',
            use_bias=False,
        )(x7)
        x7 = tfa.layers.InstanceNormalization()(x7)
        x7 = layers.LeakyReLU(alpha=0.2)(x7)

        x8 = layers.UpSampling2D()(x7)
        x8 = layers.Concatenate()([x8, input_images])
        x8 = layers.Conv2D(
            filters=32,
            kernel_size=(3, 3),
            strides=(1, 1),
            padding='same',
            use_bias=False,
        )(x8)
        x8 = tfa.layers.InstanceNormalization()(x8)
        x8 = layers.LeakyReLU(alpha=0.2)(x8)

        x9 = layers.Conv2D(
            filters=3,
            kernel_size=(5, 5),
            strides=(1, 1),
            padding='same',
            use_bias=False,
            activation='tanh',
        )(x8)

        model = Model(name=self.model_name, inputs=input_images, outputs=x9)
        return model
Ejemplo n.º 17
0
def caltech_model3(n_classes: int,
                   input_shape=None,
                   input_tensor=None,
                   weights_path: Union[None, str] = None) -> Sequential:
    """
    Defines a caltech network.

    :param n_classes: the number of classes.
    We use this parameter even though we know its value,
    in order to be able to use the model in order to predict some of the classes.
    :param input_shape: the input shape of the network. Can be omitted if input_tensor is used.
    :param input_tensor: the input tensor of the network. Can be omitted if input_shape is used.
    :param weights_path: a path to a trained custom network's weights.
    :return: Keras Sequential Model.
    """
    inputs = create_inputs(input_shape, input_tensor)

    # Define a weight decay for the regularisation.
    weight_decay = 1e-3

    x = Conv2D(64, (3, 3),
               padding='same',
               activation='relu',
               input_shape=input_shape,
               kernel_regularizer=l2(weight_decay))(inputs)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)

    x = Conv2D(64, (3, 3),
               padding='same',
               activation='relu',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(64, (3, 3),
               padding='same',
               activation='relu',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)

    x = Conv2D(128, (3, 3),
               padding='same',
               activation='relu',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(256, (3, 3),
               padding='same',
               activation='relu',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)

    x = Conv2D(256, (3, 3),
               padding='same',
               activation='relu',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)

    x = Conv2D(256, (3, 3),
               padding='same',
               activation='relu',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(512, (3, 3),
               padding='same',
               activation='relu',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)

    x = Conv2D(512, (3, 3),
               padding='same',
               activation='relu',
               kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)

    x = Flatten()(x)
    x = Dense(1024, kernel_regularizer=l2(weight_decay))(x)
    x = Dense(256, kernel_regularizer=l2(weight_decay))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    outputs = Dense(n_classes, activation='softmax', name='softmax_outputs')(x)

    # Create model.
    model = Model(inputs, outputs, name='caltech_model3')
    # Load weights, if they exist.
    load_weights(weights_path, model)

    return model
def create_nufft_nn(endcoderArch,
                    lstm_layers,
                    decoderArch,
                    input_shape,
                    CUDA=False):
    '''
    NUFFT neural net constructor
    endcoder: list 
        arguments for the encoder's architecture
    lstm_layers: list of ints
        list contains number units for each stacked layer of LSTM
    input_shape: list int
        shape of the input in list. Size of of the dimension for corresponding index
    decoderArch: list
        arguments for the decoder's architecture
    CUDA: boolean
        if set to False will not use CuDNNLSTM. Therefore it should be set False when support 
        for cuda is not available

    '''
    #{
    #super(NUFFT_NN, self).__init__()
    #isLastLSTM = True
    lstm = []
    encoder = None
    numConv = 0
    if len(endcoderArch) != 0:
        numConv = len(endcoderArch[1])
        if len(endcoderArch[1]) != 0:
            encoder = Encoder(*endcoderArch)

    if CUDA:
        #{
        if len(lstm_layers) > 2:
            #{
            if numConv == 0:
                lstm.append(
                    CuDNNLSTM(lstm_layers[0],
                              input_shape=input_shape,
                              return_sequences=True))
            else:
                lstm.append(CuDNNLSTM(lstm_layers[0], return_sequences=True))
            del lstm_layers[0]
            lastLayer = lstm_layers.pop()
            CuDNNLSTM(lstm_layers[0],
                      input_shape=input_shape,
                      return_sequences=True)
            for ix, units in enumerate(lstm_layers):
                #{
                lstm.append(CuDNNLSTM(units, return_sequences=True))
                #}
            lstm.append(CuDNNLSTM(lastLayer, return_sequences=False))
            #}

        elif len(lstm_layers) == 2:
            #{
            if numConv == 0:
                lstm.append(
                    CuDNNLSTM(lstm_layers[0],
                              input_shape=input_shape,
                              return_sequences=True))
            else:
                lstm.append(CuDNNLSTM(lstm_layers[0], return_sequences=True))
            lstm.append(CuDNNLSTM(lstm_layers[1], return_sequences=False))
            #}
        else:
            #{
            if numConv == 0:
                lstm.append(
                    CuDNNLSTM(lstm_layers[0],
                              input_shape=input_shape,
                              return_sequences=False))
            else:
                lstm.append(CuDNNLSTM(lstm_layers[0], return_sequences=False))
            #}
        #]
    else:
        #{
        if len(lstm_layers) > 2:
            #{
            if numConv == 0:
                lstm.append(
                    LSTM(lstm_layers[0],
                         input_shape=input_shape,
                         return_sequences=True))
            else:
                lstm.append(LSTM(lstm_layers[0], return_sequences=True))
            del lstm_layers[0]
            lastLayer = lstm_layers.pop()
            for ix, units in enumerate(lstm_layers):
                #{
                lstm.append(LSTM(units, return_sequences=True))
            #}
            lstm.append(LSTM(lastLayer, return_sequences=False))
            #}
        elif len(lstm_layers) == 2:
            #{
            if numConv == 0:
                lstm.append(
                    LSTM(lstm_layers[0],
                         input_shape=input_shape,
                         return_sequences=True))
            else:
                lstm.append(LSTM(lstm_layers[0], return_sequences=True))
            lstm.append(LSTM(lstm_layers[1], return_sequences=False))
            #}
        else:
            if numConv == 0:
                lstm.append(
                    LSTM(lstm_layers[0],
                         input_shape=input_shape,
                         return_sequences=False))
            else:
                lstm.append(LSTM(lstm_layers[0], return_sequences=False))

        #}
    decoder = Decoder(*decoderArch)
    input_points = Input(shape=input_shape)
    x = input_points
    if encoder != None:
        x = tf.expand_dims(x, -1)
        x = encoder.call(x)
        x = tf.keras.backend.squeeze(x, -1)
    for ix in range(len(lstm)):
        x = lstm[ix](x)
    x = decoder.call(x)
    nufft = Model(input_points, x)
    return nufft
def resnet_v1(input_shape, depth, num_classes=10):
    """ResNet Version 1 Model builder [a]

    Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
    Last ReLU is after the shortcut connection.
    At the beginning of each stage, the feature map size is halved (downsampled)
    by a convolutional layer with strides=2, while the number of filters is
    doubled. Within each stage, the layers have the same number filters and the
    same number of filters.
    Features maps sizes:
    stage 0: 32x32, 16
    stage 1: 16x16, 32
    stage 2:  8x8,  64
    The Number of parameters is approx the same as Table 6 of [a]:
    ResNet20 0.27M
    ResNet32 0.46M
    ResNet44 0.66M
    ResNet56 0.85M
    ResNet110 1.7M

    # Arguments
        input_shape (tensor): shape of input image tensor
        depth (int): number of core convolutional layers
        num_classes (int): number of classes (CIFAR10 has 10)

    # Returns
        model (Model): Keras model instance
    """
    if (depth - 2) % 6 != 0:
        raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
    # Start model definition.
    num_filters = 16
    num_res_blocks = int((depth - 2) / 6)

    inputs = Input(shape=input_shape)
    x = resnet_layer(inputs=inputs)
    # Instantiate the stack of residual units
    for stack in range(3):
        for res_block in range(num_res_blocks):
            strides = 1
            if stack > 0 and res_block == 0:  # first layer but not first stack
                strides = 2  # downsample
            y = resnet_layer(inputs=x,
                             num_filters=num_filters,
                             strides=strides)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters,
                             activation=None)
            if stack > 0 and res_block == 0:  # first layer but not first stack
                # linear projection residual shortcut connection to match
                # changed dims
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters,
                                 kernel_size=1,
                                 strides=strides,
                                 activation=None,
                                 batch_normalization=False)
            x = keras.layers.add([x, y])
            x = Activation('relu')(x)
        num_filters *= 2

    # Add classifier on top.
    # v1 does not use BN after last shortcut connection-ReLU
    x = AveragePooling2D(pool_size=8)(x)
    y = Flatten()(x)
    outputs = Dense(num_classes,
                    activation='softmax',
                    kernel_initializer='he_normal')(y)

    # Instantiate model.
    model = Model(inputs=inputs, outputs=outputs)
    return model
Ejemplo n.º 20
0
    def define_model(self):
        input_images = Input(shape=[
            self.model_parameters.img_height, self.model_parameters.img_width,
            self.model_parameters.num_channels
        ])

        x = layers.Conv2D(
            filters=64,
            kernel_size=(7, 7),
            padding='same',
            use_bias=False,
        )(input_images)
        x = tfa.layers.InstanceNormalization()(x)
        x = layers.ReLU()(x)

        x = layers.Conv2D(
            filters=128,
            kernel_size=(3, 3),
            strides=(2, 2),
            padding='same',
            use_bias=False,
        )(x)
        x = tfa.layers.InstanceNormalization()(x)
        x = layers.ReLU()(x)

        x = layers.Conv2D(
            filters=256,
            kernel_size=(3, 3),
            strides=(2, 2),
            padding='same',
            use_bias=False,
        )(x)

        x = layers.Conv2D(
            filters=256,
            kernel_size=(3, 3),
            strides=(2, 2),
            padding='same',
            use_bias=False,
        )(x)
        n_resnet = 6
        for _ in range(n_resnet):
            x = advanced_layers.residual_block(256, x)

        x = layers.UpSampling2D()(x)
        x = layers.Conv2D(
            filters=128,
            kernel_size=(3, 3),
            strides=(1, 1),
            padding='same',
            use_bias=False,
        )(x)
        x = tfa.layers.InstanceNormalization()(x)
        x = layers.ReLU()(x)
        x = layers.UpSampling2D()(x)
        x = layers.Conv2D(
            filters=128,
            kernel_size=(3, 3),
            strides=(1, 1),
            padding='same',
            use_bias=False,
        )(x)
        x = tfa.layers.InstanceNormalization()(x)
        x = layers.ReLU()(x)
        x = layers.UpSampling2D()(x)

        x = layers.Conv2D(
            filters=64,
            kernel_size=(3, 3),
            strides=(1, 1),
            padding='same',
            use_bias=False,
        )(x)
        x = tfa.layers.InstanceNormalization()(x)
        x = layers.ReLU()(x)

        x = layers.Conv2D(
            filters=32,
            kernel_size=(5, 5),
            strides=(1, 1),
            padding='same',
            use_bias=False,
        )(x)
        x = tfa.layers.InstanceNormalization()(x)
        x = layers.ReLU()(x)

        x = layers.Conv2D(
            filters=3,
            kernel_size=(7, 7),
            strides=(1, 1),
            padding='same',
            use_bias=False,
            activation='tanh',
        )(x)

        model = Model(name=self.model_name, inputs=input_images, outputs=x)
        return model
Ejemplo n.º 21
0
 def __fuse__(self):
     concat = [self.vision_subnetwork.output, self.audio_subnetwork.output]
     fusion = reduce(lambda x, f: f(x), self.fusion_subnetwork, concat)
     inputs = [self.vision_subnetwork.input, self.audio_subnetwork.input]
     return Model(inputs, fusion, name=self.name)
Ejemplo n.º 22
0
class RetroCycleGAN:
    def __init__(self, save_index="0", save_folder="./", generator_size=32,
                 discriminator_size=64, word_vector_dimensions=300,
                 discriminator_lr=0.0001, generator_lr=0.0001,
                 lambda_cycle=1, lambda_id_weight=0.01, one_way_mm=True,
                 cycle_mm=True,
                 cycle_dis=True,
                 id_loss=True,
                 cycle_mm_w=2,
                 cycle_loss=True):
        self.cycle_mm = cycle_mm
        self.cycle_dis = cycle_dis
        self.cycle_mae = cycle_loss
        self.id_loss = id_loss
        self.one_way_mm = one_way_mm
        self.cycle_mm_w = cycle_mm_w if self.cycle_mm else 0
        self.save_folder = save_folder

        # Input shape
        self.word_vector_dimensions = word_vector_dimensions
        self.embeddings_dimensionality = (self.word_vector_dimensions,)  # , self.channels)
        self.save_index = save_index

        # Number of filters in the first layer of G and D
        self.gf = generator_size
        self.df = discriminator_size

        # Loss weights
        self.lambda_cycle = lambda_cycle  if self.cycle_mae else 0# Cycle-consistency loss
        self.lambda_id = lambda_id_weight if self.id_loss else 0  # Identity loss

        d_lr = discriminator_lr
        self.d_lr = d_lr
        g_lr = generator_lr
        self.g_lr = g_lr
        # cv = clip_value
        # cn = cn
        self.d_A = self.build_discriminator(name="word_vector_discriminator")
        self.d_B = self.build_discriminator(name="retrofitted_word_vector_discriminator")
        self.d_ABBA = self.build_c_discriminator(name="cycle_cond_discriminator_unfit")
        self.d_BAAB = self.build_c_discriminator(name="cycle_cond_discriminator_fit")
        # Best combo sofar SGD, gaussian, dropout,5,0.5 mml(0,5,.5),3x1024gen, 2x1024, no normalization

        # return Adam(lr,amsgrad=True,decay=1e-8)

        # -------------------------
        # Construct Computational
        #   Graph of Generators
        # -------------------------

        # Build the generators
        self.g_AB = self.build_generator(name="to_retro_generator")
        # for layer in self.g_AB.layers:
        #     a = layer.get_weights()
        # print(a)

        # self.d_A.summary()
        # self.g_AB.summary()
        # plot_model(self.g_AB, show_shapes=True)
        self.g_BA = self.build_generator(name="from_retro_generator")

        # self.d_B.summary()
        # self.g_BA.summary()
        # Input images from both domains
        unfit_wv = Input(shape=self.embeddings_dimensionality, name="plain_word_vector")
        fit_wv = Input(shape=self.embeddings_dimensionality, name="retrofitted_word_vector")
        #

        # Translate images to the other domain
        fake_B = self.g_AB(unfit_wv)
        fake_A = self.g_BA(fit_wv)
        # Translate images back to original domain
        reconstr_A = self.g_BA(fake_B)
        reconstr_B = self.g_AB(fake_A)

        print("Building recon model")
        # self.reconstr = Model(inputs=[unfit_wv,fit_wv],outputs=[reconstr_A,reconstr_B])
        print("Done")
        # Identity mapping of images
        unfit_wv_id = self.g_BA(unfit_wv)
        fit_wv_id = self.g_AB(fit_wv)

        # For the combined model we will only train the generators
        # Discriminators determines validity of translated images
        valid_A = self.d_A(fake_A)
        valid_B = self.d_B(fake_B)

        # Combined model trains generators to fool discriminators
        self.d_A.trainable = False
        self.d_B.trainable = False
        # self.d_ABBA.trainable = False
        # self.d_BAAB.trainable = False

        self.combined = Model(inputs=[unfit_wv, fit_wv],  # Model that does A->B->A (left), B->A->B (right)
                              outputs=[valid_A, valid_B,  # for the bce calculation
                                       reconstr_A, reconstr_B,  # for the mae calculation
                                       reconstr_A, reconstr_B,  # for the max margin calculation
                                       unfit_wv_id, fit_wv_id,
                                       # dAc_r, dBc_r,  # for the conditional discriminator margin calculation
                                       # dAc_fake, dBc_fake  # for the conditional discriminator margin calculation
                                       ],  # for the id loss calculation
                              name="combinedmodel")

        log_path = './logs'
        callback = keras.callbacks.TensorBoard(log_dir=log_path)
        callback.set_model(self.combined)
        self.combined_callback = callback

    def compile_all(self, optimizer="sgd"):

        def max_margin_loss(y_true, y_pred):
            cost = 0
            sim_neg = 25
            sim_margin = 1
            for i in range(0, sim_neg):
                new_true = tf.random.shuffle(y_true)
                normalize_a = tf.nn.l2_normalize(y_true)
                normalize_b = tf.nn.l2_normalize(y_pred)
                normalize_c = tf.nn.l2_normalize(new_true)
                minimize = tf.reduce_sum(tf.multiply(normalize_a, normalize_b))
                maximize = tf.reduce_sum(tf.multiply(normalize_a, normalize_c))
                mg = sim_margin - minimize + maximize
                # print(mg)
                cost += tf.keras.backend.clip(mg, 0, 1000)
            return cost / (sim_neg * 1.0)

        def create_opt(lr=0.1):
            if optimizer == "adam":
                opt = tf.optimizers.Adam(lr=lr, epsilon=1e-10)
                return opt
            else:
                raise KeyError("coULD NOT FIND THE OPTIMIZER")
        # self.d_A.trainable = True
        # self.d_B.trainable = True

        self.d_A.compile(loss='binary_crossentropy',
                         optimizer=create_opt(self.d_lr),
                         metrics=['accuracy'])
        self.d_ABBA.compile(loss='binary_crossentropy',
                            optimizer=create_opt(self.d_lr),
                            metrics=['accuracy'])
        self.d_BAAB.compile(loss='binary_crossentropy',
                            optimizer=create_opt(self.d_lr),
                            metrics=['accuracy'])

        self.d_B.compile(loss='binary_crossentropy',
                         optimizer=create_opt(self.d_lr),
                         metrics=['accuracy'])
        # self.d_A.trainable = False
        # self.d_B.trainable = False

        self.g_AB.compile(loss=max_margin_loss,
                          optimizer=create_opt(self.g_lr),
                          )
        self.g_BA.compile(loss=max_margin_loss,
                          optimizer=create_opt(self.g_lr),
                          )

        self.combined.compile(loss=['binary_crossentropy', 'binary_crossentropy',
                                    'mae', 'mae',
                                    max_margin_loss, max_margin_loss,
                                    'mae', 'mae',
                                    ],
                              loss_weights=[1, 1,
                                            self.lambda_cycle * 1, self.lambda_cycle * 1,
                                            self.cycle_mm_w, self.cycle_mm_w,
                                            self.lambda_id, self.lambda_id,
                                            # self.lambda_cycle * 1, self.lambda_cycle * 1,
                                            # self.lambda_cycle * 1, self.lambda_cycle * 1
                                            ],
                              optimizer=create_opt(self.g_lr))
        # self.combined.summary()
        self.g_AB.summary()
        self.d_A.summary()
        self.combined.summary()

    def build_generator(self, name, hidden_dim=2048):
        """U-Net Generator"""

        def dense(layer_input, hidden_dim, normalization=True, dropout=True, dropout_percentage=0.2):
            d = Dense(hidden_dim, activation="relu")(layer_input)
            if normalization:
                d = BatchNormalization()(d)
            if dropout:
                d = Dropout(dropout_percentage)(d)
            return d

        # Image input
        inpt = Input(shape=self.embeddings_dimensionality)
        encoder = dense(inpt, hidden_dim, normalization=False, dropout=True, dropout_percentage=0.2)

        decoder = dense(encoder, hidden_dim, normalization=False, dropout=True, dropout_percentage=0.2)  # +encoder
        output = Dense(self.word_vector_dimensions)(decoder)
        return Model(inpt, output, name=name)

    def build_discriminator(self, name, hidden_dim=2048):

        def d_layer(layer_input, hidden_dim, normalization=True, dropout=True, dropout_percentage=0.3):
            """Discriminator layer"""
            d = Dense(hidden_dim, activation="relu")(layer_input)
            if normalization:
                d = BatchNormalization()(d)
            if dropout:
                d = Dropout(dropout_percentage)(d)
            return d

        inpt = Input(shape=self.embeddings_dimensionality)
        d1 = d_layer(inpt, hidden_dim, normalization=False, dropout=True, dropout_percentage=0.3)
        d1 = d_layer(d1, hidden_dim, normalization=True, dropout=True, dropout_percentage=0.3)
        validity = Dense(1, activation="sigmoid", dtype='float32')(d1)
        return Model(inpt, validity, name=name)

    def build_c_discriminator(self, name, hidden_dim=2048):

        def d_layer(layer_input, hidden_dim, normalization=True, dropout=True, dropout_percentage=0.3):
            """Discriminator layer"""
            d = Dense(hidden_dim, activation="relu")(layer_input)
            if normalization:
                d = BatchNormalization()(d)
            if dropout:
                d = Dropout(dropout_percentage)(d)
            return d

        inpt = Input(shape=600)
        d1 = d_layer(inpt, hidden_dim, normalization=False, dropout=True, dropout_percentage=0.3)
        d1 = d_layer(d1, hidden_dim, normalization=True, dropout=True, dropout_percentage=0.3)
        validity = Dense(1, activation="sigmoid", dtype='float32')(d1)
        return Model(inpt, validity, name=name)

    def load_weights(self, preface="", folder=None):
        if folder is None:
            folder = self.save_folder
        try:
            self.g_AB.reset_states()
            self.g_BA.reset_states()
            self.combined.reset_states()
            self.d_B.reset_states()
            self.d_A.reset_states()
            self.d_A.load_weights(os.path.join(folder, preface + "fromretrodis.h5"))
            self.d_B.load_weights(os.path.join(folder, preface + "toretrodis.h5"))
            self.g_AB.load_weights(os.path.join(folder, preface + "toretrogen.h5"))
            self.g_BA.load_weights(os.path.join(folder, preface + "fromretrogen.h5"))
            self.combined.load_weights(os.path.join(folder, preface + "combined_model.h5"))

        except Exception as e:
            print(e)

    def train(self, epochs, dataset, save_folder, name, batch_size=1, cache=False, epochs_per_checkpoint=4,
              dis_train_amount=3):
        wandb.init(project="retrogan", dir=save_folder)
        wandb.run.name = name
        # wandb.watch(self.g_AB,criterion="simlex")
        wandb.run.save()
        self.name = name
        start_time = datetime.datetime.now()
        res = []
        X_train, Y_train = tools.load_all_words_dataset_final(dataset["original"], dataset["retrofitted"],
                                                              save_folder=save_folder, cache=cache)
        print("Shapes of training data:",
              X_train.shape,
              Y_train.shape)
        print(X_train)
        print(Y_train)
        print("*" * 100)

        def load_batch(batch_size=32, always_random=False):
            def _int_load():
                iterable = list(Y_train.index)
                shuffle(iterable)
                batches = []
                print("Prefetching batches")
                for ndx in tqdm(range(0, len(iterable), batch_size)):
                    try:
                        ixs = iterable[ndx:min(ndx + batch_size, len(iterable))]
                        if always_random:
                            ixs = list(np.array(iterable)[random.sample(range(0, len(iterable)), batch_size)])
                        imgs_A = X_train.loc[ixs]
                        imgs_B = Y_train.loc[ixs]
                        if np.isnan(imgs_A).any().any() or np.isnan(imgs_B).any().any():  # np.isnan(imgs_B).any():
                            # print(ixs)
                            continue

                        batches.append((imgs_A, imgs_B))
                    except Exception as e:
                        print("Skipping batch")
                        # print(e)
                return batches

            batches = _int_load()

            print("Beginning iteration")
            for i in tqdm(range(0, len(batches)), ncols=30):
                imgs_A, imgs_B = batches[i]
                yield np.array(imgs_A.values, dtype=np.float32), np.array(imgs_B.values, dtype=np.float32)

        # def load_random_batch(batch_size=32, batch_amount=1000000):
        #     iterable = list(Y_train.index)
        #     # shuffle(iterable)
        #     ixs = list(np.array(iterable)[random.sample(range(0, len(iterable)), batch_size)])
        #     imgs_A = X_train.loc[ixs]
        #     imgs_B = Y_train.loc[ixs]
        #     def test_nan(a,b):
        #         return np.isnan(a).any().any() or np.isnan(b).any().any()
        #     while True:
        #         if(test_nan(imgs_A,imgs_B)):
        #             ixs = list(np.array(iterable)[random.sample(range(0, len(iterable)), batch_size)])
        #             imgs_A = X_train.loc[ixs]
        #             imgs_B = Y_train.loc[ixs]
        #         else:
        #             break
        #     return imgs_A, imgs_B
        #
        # def exp_decay(epoch):
        #     initial_lrate = 0.1
        #     k = 0.1
        #     lrate = initial_lrate * math.exp(-k * epoch)
        #     return lrate

        # noise = np.random.normal(size=(1, dimensionality), scale=0.001)
        # noise = np.tile(noise,(batch_size,1))
        dis_train_amount = dis_train_amount

        self.compile_all("adam")

        # ds = tf.data.Dataset.from_generator(load_batch,(tf.float32,tf.float32),args=(batch_size,))
        # ds = ds.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

        def train_(training_epochs, always_random=False):
            global_step = 0
            for epoch in range(training_epochs):
                # noise = np.random.normal(size=(batch_size, dimensionality), scale=0.01)
                for batch_i, (imgs_A, imgs_B) in enumerate(load_batch(batch_size, always_random=always_random)):
                    global_step += 1
                    # for batch_i, (imgs_A, imgs_B) in enumerate(ds):
                    # try:
                    # if epoch % 2 == 0:
                    #     # print("Adding noise")
                    #     imgs_A = np.add(noise[0:imgs_A.shape[0], :], imgs_A)
                    #     imgs_B = np.add(noise[0:imgs_B.shape[0], :], imgs_B)
                    # imgs_A = tf.cast(imgs_A, tf.float32)
                    # imgs_B = tf.cast(imgs_B, tf.float32)

                    fake_B = self.g_AB.predict(imgs_A)
                    fake_A = self.g_BA.predict(imgs_B)
                    fake_ABBA = self.g_BA.predict(fake_B)
                    fake_BAAB = self.g_AB.predict(fake_A)
                    # Train the discriminators (original images = real / translated = Fake)
                    dA_loss = None
                    dB_loss = None
                    valid = np.ones((imgs_A.shape[0],))  # *noisy_entries_num,) )
                    fake = np.zeros((imgs_A.shape[0],))  # *noisy_entries_num,) )
                    # self.d_A.trainable = True
                    # self.d_B.trainable = True

                    for _ in range(int(dis_train_amount)):
                        # da = self.d_A.evaluate(imgs_A)
                        dA_loss_real = self.d_A.train_on_batch(imgs_A, valid)
                        # daf = self.d_A(fake_A)
                        dA_loss_fake = self.d_A.train_on_batch(fake_A, fake)
                        if dA_loss is None:
                            dA_loss = 0.5 * np.add(dA_loss_real, dA_loss_fake)
                        else:
                            dA_loss += 0.5 * np.add(dA_loss_real, dA_loss_fake)
                        dB_loss_real = self.d_B.train_on_batch(imgs_B, valid)
                        dB_loss_fake = self.d_B.train_on_batch(fake_B, fake)
                        if dB_loss is None:
                            dB_loss = 0.5 * np.add(dB_loss_real, dB_loss_fake)
                        else:
                            dB_loss += 0.5 * np.add(dB_loss_real, dB_loss_fake)
                    d_loss = (1.0 / dis_train_amount) * 0.5 * np.add(dA_loss, dB_loss)
                    # self.d_A.trainable = False
                    # self.d_B.trainable = False

                    def CycleCondLoss(d_ground, d_approx):
                        l = tf.math.log(d_ground) + tf.math.log(1 - d_approx)
                        return -1 * tf.reduce_mean(l)

                    # train cycle discriminators
                    d_cycle_dis = 0
                    g_cycle_dis = 0
                    if self.cycle_dis:
                        with tf.GradientTape() as tape:
                            dA = self.d_ABBA(tf.concat([fake_B, imgs_A], 1))
                            dA_r = self.d_ABBA(tf.concat([fake_B, fake_ABBA], 1))
                            la = CycleCondLoss(dA, dA_r)
                            tga = tape.gradient(la, self.d_ABBA.trainable_variables)
                            self.d_ABBA.optimizer.apply_gradients(zip(tga, self.d_ABBA.trainable_variables))
                            d_cycle_dis += la

                        with tf.GradientTape() as tape:
                            dB = self.d_BAAB(tf.concat([fake_A, imgs_B], 1))
                            dB_r = self.d_BAAB(tf.concat([fake_A, fake_BAAB], 1))
                            lb = CycleCondLoss(dB, dB_r)
                            tgb = tape.gradient(lb, self.d_BAAB.trainable_variables)
                            self.d_BAAB.optimizer.apply_gradients(zip(tgb, self.d_BAAB.trainable_variables))
                            d_cycle_dis += lb
                        with tf.GradientTape() as tape:
                            fake_B = self.g_AB(imgs_A)
                            fake_A = self.g_BA(imgs_B)
                            fake_ABBA = self.g_BA(fake_B)
                            fake_BAAB = self.g_AB(fake_A)
                            dB = self.d_BAAB(tf.concat([fake_A, imgs_B], 1))
                            dB_r = self.d_BAAB(tf.concat([fake_A, fake_BAAB], 1))

                            dA = self.d_ABBA(tf.concat([fake_B, imgs_A], 1))
                            dA_r = self.d_ABBA(tf.concat([fake_B, fake_ABBA], 1))
                            la = CycleCondLoss(dA, dA_r)
                            lb = CycleCondLoss(dB, dB_r)

                            tga = tape.gradient((la + lb) / 2.0, self.combined.trainable_variables)
                            self.combined.optimizer.apply_gradients(zip(tga, self.combined.trainable_variables))
                            g_cycle_dis += (la + lb) / 2.0

                    # Calculate the max margin loss for A->B, B->A
                    mm_b_loss = 0
                    mm_a_loss = 0
                    if self.one_way_mm:
                        mm_a_loss = self.g_AB.train_on_batch(imgs_A, imgs_B)
                        mm_b_loss = self.g_BA.train_on_batch(imgs_B, imgs_A)
                    # Calculate the cycle A->B->A, B->A->B with max margin, and mae
                    # Train cycle dis

                    g_loss = self.combined.train_on_batch([imgs_A, imgs_B],
                                                          [valid, valid,
                                                           imgs_A, imgs_B,
                                                           imgs_A, imgs_B,
                                                           imgs_A, imgs_B,
                                                           # valid,valid,
                                                           # valid,valid
                                                           ])

                    def named_logs(model, logs):
                        result = {}
                        for l in zip(model.metrics_names, logs):
                            result[l[0]] = l[1]
                        return result

                    r = named_logs(self.combined, g_loss)
                    r.update({
                        'mma': mm_a_loss,
                        'mmb': mm_b_loss,
                    })
                    elapsed_time = datetime.datetime.now() - start_time
                    if batch_i % 50 == 0 and batch_i != 0:
                        print(
                            "\n[Epoch %d/%d] [Batch %d] [D loss: %f, acc: %3d%%] "
                            "[G loss: %05f, adv: %05f, recon: %05f, recon_mm: %05f,id: %05f][mma:%05f,mmb:%05f]time: %s " \
                            % (epoch, training_epochs,
                               batch_i,
                               d_loss[0], 100 * d_loss[1],
                               g_loss[0],
                               np.mean(g_loss[1:3]),
                               np.mean(g_loss[3:5]),
                               np.mean(g_loss[5:7]),
                               np.mean(g_loss[7:8]),
                               mm_a_loss,
                               mm_b_loss,
                               elapsed_time))

                        scalars = {
                            "epoch": epoch,
                            # "batch": batch_i,
                            "global_step": global_step,
                            "discriminator_loss": d_loss[0],
                            "discriminator_acc": d_loss[1],
                            "combined_loss": g_loss[0]+g_cycle_dis+d_cycle_dis,
                            "loss": g_loss[0] + d_loss[0],
                            "cycle_da": g_loss[1],
                            "cycle_db": g_loss[2],
                            "cycle_dis": d_cycle_dis,
                            "cycle_gen_condis":g_cycle_dis,
                            "MM_ABBA_CYCLE": g_loss[5],
                            "MM_BAAB_CYCLE": g_loss[6],
                            "abba_mae": g_loss[3],
                            "baab_mae": g_loss[4],
                            "idloss_ab": g_loss[7],
                            "idloss_ba": g_loss[8],
                            "mm_ab_loss": mm_a_loss,
                            "mm_ba_loss": mm_b_loss,
                        }
                        wandb.log(scalars, step=global_step)

                        # wandbcb.on_batch_end(batch_i, r)
                        # wandb.log({"batch_num":batch_i,"epoch_num":epoch})
                        # self.combined_callback.on_batch_end(batch_i, r)

                print("\n")
                sl, sv,c = self.test(dataset)
                if epoch % epochs_per_checkpoint == 0 and epoch != 0:
                    self.save_model(name="checkpoint")

                res.append((sl, sv, c))
                wandb.log({"simlex": sl, "simverb": sv, "card":c,"epoch": epoch})

                # self.combined_callback.on_epoch_end(epoch, {"simlex": sl, "simverb": sv})
                # wandbcb.on_epoch_end(epoch, {"simlex": sl, "simverb": sv})

                print(res)
                print("\n")

        print("Actual training")
        train_(epochs)
        print("Final performance")
        sl, sv,c = self.test(dataset)
        res.append((sl, sv,c))

        self.save_model(name="final")
        return res

    def test(self, dataset, simlex="testing/SimLex-999.txt", simverb="testing/SimVerb-3500.txt",card="testing/card660.tsv",
             fasttext="fasttext_model/cc.en.300.bin",
             prefix="en_"):
        sl = tools.test_sem(self.g_AB, dataset, dataset_location=simlex,
                            fast_text_location=fasttext, prefix=prefix,pt=False)[0]
        sv = tools.test_sem(self.g_AB, dataset, dataset_location=simverb,
                            fast_text_location=fasttext, prefix=prefix,pt=False)[0]
        c = tools.test_sem(self.g_AB, dataset, dataset_location=card,
                            fast_text_location=fasttext, prefix=prefix,pt=False)[0]
        return sl, sv,c

    def save_model(self, name=""):
        self.d_A.save(os.path.join(self.save_folder, name + "fromretrodis.h5"), include_optimizer=False)
        self.d_B.save(os.path.join(self.save_folder, name + "toretrodis.h5"), include_optimizer=False)
        self.g_AB.save(os.path.join(self.save_folder, name + "toretrogen.h5"), include_optimizer=False)
        self.g_BA.save(os.path.join(self.save_folder, name + "fromretrogen.h5"), include_optimizer=False)
        self.combined.save(os.path.join(self.save_folder, name + "combined_model.h5"), include_optimizer=False)
Ejemplo n.º 23
0
def siamese_network(input_shape=(105, 105, 1), classes=1):
    """Network Architecture"""
    left_input = layers.Input(shape=input_shape)
    right_input = layers.Input(shape=input_shape)

    # Creating the convnet which shares weights between the left and right legs of Siamese network
    siamese_convnet = Sequential()

    siamese_convnet.add(
        layers.Conv2D(filters=64,
                      kernel_size=10,
                      strides=1,
                      input_shape=input_shape,
                      activation='relu',
                      kernel_initializer=RandomNormal(mean=0, stddev=0.01),
                      kernel_regularizer=l2(1e-2),
                      bias_initializer=RandomNormal(mean=0.5, stddev=0.01)))

    siamese_convnet.add(layers.MaxPooling2D(pool_size=(2, 2)))

    siamese_convnet.add(
        layers.Conv2D(filters=128,
                      kernel_size=7,
                      strides=1,
                      activation='relu',
                      kernel_initializer=RandomNormal(mean=0, stddev=0.01),
                      kernel_regularizer=l2(1e-2),
                      bias_initializer=RandomNormal(mean=0.5, stddev=0.01)))

    siamese_convnet.add(layers.MaxPooling2D(pool_size=(2, 2)))

    siamese_convnet.add(
        layers.Conv2D(filters=128,
                      kernel_size=4,
                      strides=1,
                      activation='relu',
                      kernel_initializer=RandomNormal(mean=0, stddev=0.01),
                      kernel_regularizer=l2(1e-2),
                      bias_initializer=RandomNormal(mean=0.5, stddev=0.01)))

    siamese_convnet.add(layers.MaxPooling2D(pool_size=(2, 2)))

    siamese_convnet.add(
        layers.Conv2D(filters=256,
                      kernel_size=4,
                      strides=1,
                      activation='relu',
                      kernel_initializer=RandomNormal(mean=0, stddev=0.01),
                      kernel_regularizer=l2(1e-2),
                      bias_initializer=RandomNormal(mean=0.5, stddev=0.01)))

    siamese_convnet.add(layers.Flatten())

    siamese_convnet.add(
        layers.Dense(4096,
                     activation='sigmoid',
                     kernel_initializer=RandomNormal(mean=0, stddev=0.2),
                     kernel_regularizer=l2(1e-4),
                     bias_initializer=RandomNormal(mean=0.5, stddev=0.01)))

    encoded_left_input = siamese_convnet(left_input)
    encoded_right_input = siamese_convnet(right_input)

    l1_encoded = layers.Lambda(lambda x: tf.abs(x[0] - x[1]))(
        [encoded_left_input, encoded_right_input])

    output = layers.Dense(classes,
                          activation='sigmoid',
                          kernel_initializer=RandomNormal(mean=0, stddev=0.2),
                          bias_initializer=RandomNormal(
                              mean=0.5, stddev=0.01))(l1_encoded)

    return Model(inputs=[left_input, right_input], outputs=output)
Ejemplo n.º 24
0
    def __init__(self, save_index="0", save_folder="./", generator_size=32,
                 discriminator_size=64, word_vector_dimensions=300,
                 discriminator_lr=0.0001, generator_lr=0.0001,
                 lambda_cycle=1, lambda_id_weight=0.01, one_way_mm=True,
                 cycle_mm=True,
                 cycle_dis=True,
                 id_loss=True,
                 cycle_mm_w=2,
                 cycle_loss=True):
        self.cycle_mm = cycle_mm
        self.cycle_dis = cycle_dis
        self.cycle_mae = cycle_loss
        self.id_loss = id_loss
        self.one_way_mm = one_way_mm
        self.cycle_mm_w = cycle_mm_w if self.cycle_mm else 0
        self.save_folder = save_folder

        # Input shape
        self.word_vector_dimensions = word_vector_dimensions
        self.embeddings_dimensionality = (self.word_vector_dimensions,)  # , self.channels)
        self.save_index = save_index

        # Number of filters in the first layer of G and D
        self.gf = generator_size
        self.df = discriminator_size

        # Loss weights
        self.lambda_cycle = lambda_cycle  if self.cycle_mae else 0# Cycle-consistency loss
        self.lambda_id = lambda_id_weight if self.id_loss else 0  # Identity loss

        d_lr = discriminator_lr
        self.d_lr = d_lr
        g_lr = generator_lr
        self.g_lr = g_lr
        # cv = clip_value
        # cn = cn
        self.d_A = self.build_discriminator(name="word_vector_discriminator")
        self.d_B = self.build_discriminator(name="retrofitted_word_vector_discriminator")
        self.d_ABBA = self.build_c_discriminator(name="cycle_cond_discriminator_unfit")
        self.d_BAAB = self.build_c_discriminator(name="cycle_cond_discriminator_fit")
        # Best combo sofar SGD, gaussian, dropout,5,0.5 mml(0,5,.5),3x1024gen, 2x1024, no normalization

        # return Adam(lr,amsgrad=True,decay=1e-8)

        # -------------------------
        # Construct Computational
        #   Graph of Generators
        # -------------------------

        # Build the generators
        self.g_AB = self.build_generator(name="to_retro_generator")
        # for layer in self.g_AB.layers:
        #     a = layer.get_weights()
        # print(a)

        # self.d_A.summary()
        # self.g_AB.summary()
        # plot_model(self.g_AB, show_shapes=True)
        self.g_BA = self.build_generator(name="from_retro_generator")

        # self.d_B.summary()
        # self.g_BA.summary()
        # Input images from both domains
        unfit_wv = Input(shape=self.embeddings_dimensionality, name="plain_word_vector")
        fit_wv = Input(shape=self.embeddings_dimensionality, name="retrofitted_word_vector")
        #

        # Translate images to the other domain
        fake_B = self.g_AB(unfit_wv)
        fake_A = self.g_BA(fit_wv)
        # Translate images back to original domain
        reconstr_A = self.g_BA(fake_B)
        reconstr_B = self.g_AB(fake_A)

        print("Building recon model")
        # self.reconstr = Model(inputs=[unfit_wv,fit_wv],outputs=[reconstr_A,reconstr_B])
        print("Done")
        # Identity mapping of images
        unfit_wv_id = self.g_BA(unfit_wv)
        fit_wv_id = self.g_AB(fit_wv)

        # For the combined model we will only train the generators
        # Discriminators determines validity of translated images
        valid_A = self.d_A(fake_A)
        valid_B = self.d_B(fake_B)

        # Combined model trains generators to fool discriminators
        self.d_A.trainable = False
        self.d_B.trainable = False
        # self.d_ABBA.trainable = False
        # self.d_BAAB.trainable = False

        self.combined = Model(inputs=[unfit_wv, fit_wv],  # Model that does A->B->A (left), B->A->B (right)
                              outputs=[valid_A, valid_B,  # for the bce calculation
                                       reconstr_A, reconstr_B,  # for the mae calculation
                                       reconstr_A, reconstr_B,  # for the max margin calculation
                                       unfit_wv_id, fit_wv_id,
                                       # dAc_r, dBc_r,  # for the conditional discriminator margin calculation
                                       # dAc_fake, dBc_fake  # for the conditional discriminator margin calculation
                                       ],  # for the id loss calculation
                              name="combinedmodel")

        log_path = './logs'
        callback = keras.callbacks.TensorBoard(log_dir=log_path)
        callback.set_model(self.combined)
        self.combined_callback = callback
Ejemplo n.º 25
0
def CLRNet(input_shape=None,
           classes=10,
           block='bottleneck',
           residual_unit='v2',
           repetitions=None,
           initial_filters=64,
           activation='softmax',
           include_top=True,
           input_tensor=None,
           dropout=None,
           transition_dilation_rate=(1, 1),
           initial_strides=(2, 2),
           initial_kernel_size=(7, 7),
           initial_pooling='max',
           final_pooling=None,
           top='classification'):
    """Builds a custom ResNet like architecture. Defaults to CLRNet50 v2.

    Args:
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` dim ordering)
            or `(3, 224, 224)` (with `channels_first` dim ordering).
            It should have exactly 3 dimensions,
            and width and height should be no smaller than 8.
            E.g. `(224, 224, 3)` would be one valid value.
        classes: The number of outputs at final softmax layer
        block: The block function to use. This is either `'basic'` or `'bottleneck'`.
            The original paper used `basic` for layers < 50.
        repetitions: Number of repetitions of various block units.
            At each block unit, the number of filters are doubled and the input size
            is halved. Default of None implies the CLRNet50v2 values of [3, 4, 6, 3].
        residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu
            conv. See [Identity Mappings in
            Deep Residual Networks](https://arxiv.org/abs/1603.05027)
            for details.
        dropout: None for no dropout, otherwise rate of dropout from 0 to 1.
            Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper.
        transition_dilation_rate: Dilation rate for transition layers. For semantic
            segmentation of images use a dilation rate of (2, 2).
        initial_strides: Stride of the very first residual unit and MaxPooling2D call,
            with default (2, 2), set to (1, 1) for small images like cifar.
        initial_kernel_size: kernel size of the very first convolution, (7, 7) for
            imagenet and (3, 3) for small image datasets like tiny imagenet and cifar.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        initial_pooling: Determine if there will be an initial pooling layer,
            'max' for imagenet and None for small image datasets.
            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
        final_pooling: Optional pooling mode for feature extraction at the final
            model layer when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        top: Defines final layers to evaluate based on a specific problem type. Options
            are 'classification' for ImageNet style problems, 'segmentation' for
            problems like the Pascal VOC dataset, and None to exclude these layers
            entirely.

    Returns:
        The keras `Model`.
    """
    if activation not in ['softmax', 'sigmoid', None]:
        raise ValueError(
            'activation must be one of "softmax", "sigmoid", or None')
    if activation == 'sigmoid' and classes != 1:
        raise ValueError(
            'sigmoid activation can only be used when classes = 1')
    if repetitions is None:
        repetitions = [3, 4, 6, 3]

    _handle_dim_ordering()
    if len(input_shape) != 4:
        raise Exception(
            "Input shape should be a tuple (frames,nb_channels, nb_rows, nb_cols)"
        )

    if block == 'basic':
        block_fn = basic_block
    elif block == 'bottleneck':
        block_fn = bottleneck
    elif isinstance(block, six.string_types):
        block_fn = _string_to_function(block)
    else:
        block_fn = block

    if residual_unit == 'v2':
        residual_unit = _bn_relu_conv
    elif residual_unit == 'v1':
        residual_unit = _conv_bn_relu
    elif isinstance(residual_unit, six.string_types):
        residual_unit = _string_to_function(residual_unit)
    else:
        residual_unit = residual_unit

    # Permute dimension order if necessary
    if K.image_data_format() == 'channels_first':
        input_shape = (input_shape[1], input_shape[2], input_shape[0])

    img_input = Input(shape=input_shape, tensor=input_tensor)
    x = _conv_bn_relu(filters=initial_filters,
                      kernel_size=initial_kernel_size,
                      strides=initial_strides)(img_input)
    if initial_pooling == 'max':
        # x = MaxPooling3D(pool_size=(3, 3, 3), strides=initial_strides, padding="same")(x)
        x = MaxPooling3D(pool_size=(1, 3, 3), strides=None, padding="same")(x)

    block = x
    filters = initial_filters
    for i, r in enumerate(repetitions):
        transition_dilation_rates = [transition_dilation_rate] * r
        transition_strides = [(1, 1)] * r
        if transition_dilation_rate == (1, 1):
            transition_strides[0] = (2, 2)
        block = _residual_block(
            block_fn,
            filters=filters,
            stage=i,
            blocks=r,
            is_first_layer=(i == 0),
            dropout=dropout,
            transition_dilation_rates=transition_dilation_rates,
            transition_strides=transition_strides,
            residual_unit=residual_unit)(block)
        filters *= 2

    # Last activation

    x = _bn_relu2(block)

    # Classifier block
    if include_top and top is 'classification':
        x = GlobalAveragePooling3D()(x)
        x = Dense(units=classes,
                  activation=activation,
                  kernel_initializer="he_normal")(x)
    elif include_top and top is 'segmentation':
        x = ConvLSTM2D(classes, (1, 1),
                       activation='linear',
                       padding='same',
                       return_sequences=True)(x)
        if K.image_data_format() == 'channels_first':
            channel, row, col = input_shape
        else:
            row, col, channel = input_shape

        x = Reshape((row * col, classes))(x)
        x = Activation(activation)(x)
        x = Reshape((row, col, classes))(x)
    elif final_pooling == 'avg':
        x = GlobalAveragePooling3D()(x)
    elif final_pooling == 'max':
        x = GlobalMaxPooling3D()(x)
    model = Model(inputs=img_input, outputs=x)
    return model
Ejemplo n.º 26
0
 def Build(self):
     inputs, outputs = self.layers()
     return Model(inputs=inputs, outputs=outputs)
    def build_model(self):
        print('----------------------------------- Inside build model -----------------------------------')

        if len(self.filter_sizes) != len(self.filter_shapes):
            raise Exception("Please define filter shape and filter sizes of same length")

        # wnorm_input = Input(shape=(self.embedding_dimension, 1), dtype='float32', name=f'{self.name}_word_embedding')
        #
        # raw_batch_x = Input(shape=(313, 1), dtype='float32', name=f'{self.name}_raw_batch')

        # raw_batch_x_s = K.squeeze(raw_batch_x, axis=-1)
        #
        # wnorm_input_s = K.squeeze(wnorm_input, axis=-1)
        # seq_input_1 = Input(shape=(self.max_sent_len, self.embedding_dimension, 1),
        #                     dtype='float32', name=f'{self.name}_embedded_input_1')

        #
        # x_org_input = Input(shape=(self.max_sent_len),
        #                     dtype='float32', name=f'{self.name}_x_org')

        seq_input = Input(shape=(self.max_sent_len, self.embedding_dimension, 1),
                          dtype='float32', name=f'{self.name}_embedded_input')

        print(f'Seq input shape is {seq_input.shape}')

        cnn_1 = Conv2D(filters=self.filter_sizes[0], kernel_size=[self.filter_shapes[0], self.embedding_dimension], strides=[self.strides[0], 1],
                           padding=self.padding, activation=self.activation, name=f'{self.name}_h1_3')(seq_input)

        print(f'CNN_1 shape is {cnn_1.shape}')

        cnn_2 = Conv2D(filters=self.filter_sizes[1], kernel_size=[self.filter_shapes[1], 1], strides=[self.strides[1], 1],
                       padding=self.padding, activation=self.activation, name=f'{self.name}_h2_3')(cnn_1)

        print(f'CNN_2 shape is {cnn_2.shape}')
        print(f'Sent len 3 is {self.sent_len_3}')

        cnn_3 = Conv2D(filters=self.filter_sizes[2], kernel_size=[self.sent_len_3, 1], padding=self.padding,
                       activation=self.activation, name=f'{self.name}_h3_3')(cnn_2)

        print(f'CNN_3 shape is {cnn_3.shape}')

        H = Lambda(lambda w: K.squeeze(w, axis=2))(cnn_3)
        mid = Flatten()(H)
        mid = Dense(300, name='label_dense_1')(mid)
        label_op = Dense(1, name='label_op', activation='sigmoid')(mid)

        dcnn_3 = Conv2DTranspose(filters=self.filter_sizes[1], kernel_size=[self.sent_len_3, 1],
                                 padding=self.padding, activation=self.activation, name=f'{self.name}_h2_t_3')(cnn_3)

        dcnn_2 = Conv2DTranspose(filters=self.filter_sizes[0], kernel_size=[self.filter_shapes[1], 1], strides=[self.strides[1], 1],
                                 padding=self.padding, activation=self.activation, name=f'{self.name}_h2_t_2')(dcnn_3)

        reconstruction_output = Conv2DTranspose(filters=1, kernel_size=[self.filter_shapes[0], self.embedding_dimension],
                                                strides=[self.strides[0], 1],
                                 padding=self.padding, activation=self.activation, name='reconstruction_output')(dcnn_2)

        print(f'Reconstruction op shape is {reconstruction_output.shape}')

        model = Model(inputs=seq_input, outputs=[reconstruction_output, label_op])
        model.compile(optimizer=self.optimizer,
                      loss={'reconstruction_output': 'mse', 'label_op': 'binary_crossentropy'},
                      loss_weights={'reconstruction_output': 0.4, 'label_op': 1.},
                      metrics={'label_op': 'accuracy'})

        return model
Ejemplo n.º 28
0
 def Build(self):
     decoder_input, decoder_output = self.layers()
     return Model(inputs=decoder_input, outputs=decoder_output)
Ejemplo n.º 29
0
 def build_model(self):
     img_input = [Input(shape) for shape in self.get_input_shape()]
     last_layer = self.model_structure(img_input)
     self.model = Model(img_input, last_layer)
     self.model.summary()
def build_model():
    input = Input(shape=(OUTPUT_LAYER_SIZE, ))
    output = Dense(NUM_CLASSES, activation='softmax',
                   name='output_layer')(input)
    return Model(inputs=[input], outputs=output)