config.encoding_dim = 32
config.epochs = 1

(x_train, _), (x_test, _) = mnist.load_data()
(x_train_noisy, x_test_noisy) = add_noise(x_train, x_test)


x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.


model = Sequential()
model.add(Flatten(input_shape=(28,28)))
model.add(Dense(config.encoding_dim, activation='relu'))
model.add(Dense(784, activation='sigmoid'))
model.add(Reshape((28,28)))
model.compile(optimizer='adam', loss='mse')


model.fit(x_train_noisy, x_train,
                epochs=config.epochs,
                validation_data=(x_test_noisy, x_test), callbacks=[WandbKerasCallback()])


model.save("auto-denoise.h5")




def bidaff(num_words, embeddings_matrix, ce_loader, scope, embedding_dim=64):
    # (batch, input_len) => (batch, input_len, embedding_dim)
    q_input = Input(shape=(QUESTION_LEN,), name="q_input")
    a_input = Input(shape=(ANSWER_LEN,), name="a_input")
    c_input = Input(shape=(CONTEXT_LEN,), name="c_input")

    q_char_input = Input(shape=(QUESTION_LEN * MAX_WORD_LEN,),
                         name="q_char_input")
    a_char_input = Input(shape=(ANSWER_LEN * MAX_WORD_LEN,),
                         name="a_char_input")
    c_char_input = Input(shape=(CONTEXT_LEN * MAX_WORD_LEN,),
                         name="c_char_input")

    # Word embedders.
    q_emb = Embedding(input_dim=num_words + 1,  # word 0 used for padding
                      output_dim=embedding_dim,
                      weights=[embeddings_matrix],
                      input_length=QUESTION_LEN,
                      name="embedding_q2_" + scope,
                      mask_zero=False,
                      trainable=False)
    a_emb = Embedding(input_dim=num_words + 1,  # word 0 used for padding
                      output_dim=embedding_dim,
                      weights=[embeddings_matrix],
                      input_length=ANSWER_LEN,
                      name="embedding_a2_" + scope,
                      mask_zero=False,
                      trainable=False)
    c_emb = Embedding(input_dim=num_words + 1,  # word 0 used for padding
                      output_dim=embedding_dim,
                      weights=[embeddings_matrix],
                      input_length=CONTEXT_LEN,
                      name="embedding_c2_" + scope,
                      mask_zero=False,
                      trainable=False)

    # Char embedders.
    q_char_emb = Embedding(input_dim=ce_loader.get_num_words() + 1,
                           output_dim=ce_loader.get_embedding_len(),
                           weights=[ce_loader.get_embeddings_matrix()],
                           input_length=MAX_WORD_LEN * QUESTION_LEN,
                           name="char_embedding_q2_" + scope,
                           mask_zero=False,
                           trainable=False)

    a_char_emb = Embedding(input_dim=ce_loader.get_num_words() + 1,
                           output_dim=ce_loader.get_embedding_len(),
                           weights=[ce_loader.get_embeddings_matrix()],
                           input_length=MAX_WORD_LEN * ANSWER_LEN,
                           name="char_embedding_a2_" + scope,
                           mask_zero=False,
                           trainable=False)

    c_char_emb = Embedding(input_dim=ce_loader.get_num_words() + 1,
                           output_dim=ce_loader.get_embedding_len(),
                           weights=[ce_loader.get_embeddings_matrix()],
                           input_length=MAX_WORD_LEN * CONTEXT_LEN,
                           name="char_embedding_c2_" + scope,
                           mask_zero=False,
                           trainable=False)

    q = q_emb(q_input)
    a = a_emb(a_input)
    c = c_emb(c_input)

    q_char = q_char_emb(q_char_input)
    a_char = a_char_emb(a_char_input)
    c_char = c_char_emb(c_char_input)

    assert(CHAR_EMBEDDINGS_DIM == ce_loader.get_embedding_len())
    q_char = Reshape((QUESTION_LEN, MAX_WORD_LEN, CHAR_EMBEDDINGS_DIM))(q_char)
    a_char = Reshape((ANSWER_LEN, MAX_WORD_LEN, CHAR_EMBEDDINGS_DIM))(a_char)
    c_char = Reshape((CONTEXT_LEN, MAX_WORD_LEN, CHAR_EMBEDDINGS_DIM))(c_char)

    # CharCNNs for char level embeddings.
    q_char = CharCNN(q_char, name="q_charcnn")
    a_char = CharCNN(a_char, name="a_charcnn")
    c_char = CharCNN(c_char, name="c_charcnn")

    # Concatenate GloVe word embeddings with char-level embeddings.
    q = Concatenate(axis=-1)([q, q_char])
    a = Concatenate(axis=-1)([a, a_char])
    c = Concatenate(axis=-1)([c, c_char])

    q = Dropout(0.2)(q)
    c = Dropout(0.2)(c)
    a = Dropout(0.2)(a)

    # Pass them through a 2 layer highway network.
    for highway_index in range(1, 2):
        q = TimeDistributedHighway(q, 92,
                                   "highway_q_bidaff_{}".format(highway_index))
        a = TimeDistributedHighway(a, 92,
                                   "highway_a_bidaff_{}".format(highway_index))
        c = TimeDistributedHighway(c, 92,
                                   "highway_c_bidaff_{}".format(highway_index))

    # Contextual Embed Layer
    q_lstm = Bidirectional(LSTM(30, recurrent_dropout=0.15,
                                return_sequences=True))(q)
    c_lstm = Bidirectional(LSTM(30, recurrent_dropout=0.15,
                                return_sequences=True))(c)

    sim = Similarity()([c_lstm, q_lstm])

    # *************   Context-to-query attention. ***********************

    # Softmax on each line.
    col_softmax = Lambda(lambda x: K.softmax(x, axis=-1))(sim)

    # Product between sofmax prob and each query vector.
    UT = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=(2, 1)),
                output_shape=lambda x: x[0][:2] + x[1][2:])(
                        [col_softmax, q_lstm])

    # *************   Query-to-context attention. ***********************

    # Max per line then softmax.
    line_softmax = Lambda(lambda x: K.max(x, axis=-1),
                          output_shape=lambda x: (x[0], x[1]))(sim)
    line_softmax = Lambda(lambda x: K.softmax(x, axis=-1))(line_softmax)

    # Make @line_softmax a matrix with 1 row.
    line_softmax = Lambda(lambda x: K.expand_dims(x, axis=1),
                          output_shape=lambda x: (x[0], 1, x[1]))(line_softmax)

    # Matrix multiplication.
    HT = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=(2, 1)),
                output_shape=lambda x: x[0][:2] + x[1][2:])(
                        [line_softmax, c_lstm])

    # Remove one extra row.
    HT = Lambda(lambda x: K.squeeze(x, axis=1),
                output_shape=lambda x: (x[0], x[2]))(HT)

    HT = RepeatVector(CONTEXT_LEN)(HT)

    # ************    Combine attention vectors. ***********************

    G = Concatenate(axis=-1)([
        c_lstm,
        UT,
        Multiply()([c_lstm, UT]),
        Multiply()([c_lstm, HT])
    ])

    a_lstm = Bidirectional(LSTM(20, recurrent_dropout=0.15))(a)
    a_lstm = RepeatVector(CONTEXT_LEN)(a_lstm)

    cqa = Concatenate(axis=-1)([G, a_lstm])
    cqa = Dropout(0.2)(cqa)
    cqa = Bidirectional(LSTM(30, recurrent_dropout=0.15))(cqa)

    cqa = Dropout(0.25)(cqa)

    cqa = Dense(100, activation='relu')(cqa)

    cqa = Dropout(0.25)(cqa)

    output = Dense(2, activation='softmax')(cqa)
    model = Model(inputs=[
                    q_input, a_input, c_input,
                    q_char_input, a_char_input, c_char_input,
                  ], outputs=[output])
    model.compile(loss=categorical_crossentropy,
                  optimizer='adam',
                  metrics=['accuracy'])
    plot_model(model, to_file='2way_model.png', show_shapes=True)
    return model
def Net(n_label,img_input, drop_rate=0.2):

    # ---------left branch -----
    x = conv_block(img_input, 32, (3, 3), strides=1, name='L_conv1-1')
    x = SpatialDropout2D(drop_rate)(x)
    L1 = conv_block(x, 32, (3, 3), strides=1, name='L_conv1-2')
    x = conv_block(L1, 32, (3, 3), strides=2, name='L_conv1-3')
    #   400 -> 200

    x = conv_block(x, 64, (3, 3), strides=1, name='L_conv2-1')
    x = SpatialDropout2D(drop_rate)(x)
    L2 = conv_block(x, 64, (3, 3), strides=1, name='L_conv2-2')
    x = conv_block(L2, 32, (3, 3), strides=2, name='L_conv2-3')
    #   200 -> 100

    x = conv_block(x, 128, (3, 3), strides=1, name='L_conv3-1')
    x = SpatialDropout2D(drop_rate)(x)
    L3 = conv_block(x, 128, (3, 3), strides=1, name='L_conv3-2')
    x = conv_block(L3, 32, (3, 3), strides=2, name='L_conv3-3')
    #   100 -> 50

    x = conv_block(x, 256, (3, 3), strides=1, name='L_conv4-1')
    x = SpatialDropout2D(drop_rate)(x)
    L4 = conv_block(x, 256, (3, 3), strides=1, name='L_conv4-2')
    x = conv_block(L4, 32, (3, 3), strides=2, name='L_conv4-3')
    #   50 -> 25

    x = conv_block(x, 512, (3, 3), strides=1, name='L_conv5-1')
    x = conv_block(x, 512, (3, 3), strides=1, dila=2, name='L_conv5-2')
    x = SpatialDropout2D(drop_rate)(x)
    x = conv_block(x, 512, (3, 3), strides=1, dila=2, name='L_conv5-3')
    L5 = conv_block(x, 512, (3, 3), strides=1, name='L_conv5-4')

    #    25


    # ---------Right branch -----

    #   25 -> 50
    x = Deconv2D(256, kernel_size=2, strides=2, padding='same',name='R_conv1-1')(L5)
    x = BatchNormalization(axis=bn_axis, name='R_conv1-1_' + 'bn')(x)
    x = conv_block(Concatenate(axis=-1)([x, L4]), 256, (3, 3), strides=1, name='R_conv1-2')
    x = SpatialDropout2D(0.1)(x)
    x = conv_block(x, 256, (3, 3), strides=1, name='R_conv1-3')
    R_out1 = Conv2D(n_label,(1,1),name='R_out1')(x)

    #   50 -> 100
    x = Deconv2D(128, kernel_size=2, strides=2, padding='same', name='R_conv2-1')(x)
    x = BatchNormalization(axis=bn_axis, name='R_conv2-1_' + 'bn')(x)
    x = conv_block(Concatenate(axis=-1)([x, L3]), 128, (3, 3), strides=1, name='R_conv2-2')
    x = SpatialDropout2D(0.1)(x)
    x = conv_block(x, 128, (3, 3), strides=1, name='R_conv2-3')
    R_out2 = Conv2D(n_label, (1, 1), name='R_out2')(x)

    #   100 -> 200
    x = Deconv2D(64, kernel_size=2, strides=2, padding='same', name='R_conv3-1')(x)
    x = BatchNormalization(axis=bn_axis, name='R_conv3-1_' + 'bn')(x)
    x = conv_block(Concatenate(axis=-1)([x, L2]), 64, (3, 3), strides=1, name='R_conv3-2')
    x = SpatialDropout2D(0.1)(x)
    x = conv_block(x, 64, (3, 3), strides=1, name='R_conv3-3')
    R_out3 = Conv2D(n_label, (1, 1), name='R_out3')(x)

    #   200 -> 400
    x = Deconv2D(32, kernel_size=2, strides=2, padding='same', name='R_conv4-1')(x)
    x = BatchNormalization(axis=bn_axis, name='R_conv4-1_' + 'bn')(x)
    x = conv_block(Concatenate(axis=-1)([x, L1]), 32, (3, 3), strides=1, name='R_conv4-2')
    x = SpatialDropout2D(0.1)(x)
    x = conv_block(x, 32, (3, 3), strides=1, name='R_conv4-3')
    R_out4 = Conv2D(n_label, (1, 1), name='R_out4')(x)

    # ---------Recoding branch -----

    x = conv_block(R_out4, 32, (1, 1), strides=1, name='E_conv1-1')
    x = conv_block(x, 32, (3, 3), strides=1, name='E_conv1-2')
    x = SpatialDropout2D(drop_rate)(x)
    x = conv_block(x, 32, (3, 3), strides=2, name='E_conv1-3')
    #   400 -> 200

    x = conv_block(Concatenate(axis=-1)([x, conv_block(R_out3,64, (1, 1), strides=1,name='c1')]), 64, (3, 3), strides=1, name='E_conv2-1')
    x = conv_block(x, 64, (3, 3), strides=1, name='E_conv2-2')
    x = SpatialDropout2D(drop_rate)(x)
    x = conv_block(x, 64, (3, 3), strides=2, name='E_conv2-3')
    #   200 -> 100

    x = conv_block(Concatenate(axis=-1)([x, conv_block(R_out2,128, (1, 1), strides=1,name='c2')]), 128, (3, 3), strides=1, name='E_conv3-1')
    x = conv_block(x, 128, (3, 3), strides=1, name='E_conv3-2')
    x = SpatialDropout2D(drop_rate)(x)
    x = conv_block(x, 128, (3, 3), strides=2, name='E_conv3-3')
    #   100 -> 50

    x = conv_block(Concatenate(axis=-1)([x, conv_block(R_out1,256, (1, 1), strides=1,name='c3')]), 256, (3, 3), strides=1, name='E_conv4-1')
    x = conv_block(x, 256, (3, 3), strides=1, name='E_conv4-2')
    x = SpatialDropout2D(drop_rate)(x)
    x = conv_block(x, 256, (3, 3), strides=1, dila=2, name='E_conv4-3')
    x = conv_block(x, 256, (3, 3), strides=1, dila=2, name='E_conv4-4')
    x = conv_block(x, 256, (3, 3), strides=1, name='E_conv4-5')
    #   50

    x = global_context_block(x, channels=64)
# -----------------------------------------
    final_out = Conv2D(n_label,(1,1), name='final_out')(x)
    final_out = UpSampling2D(size=(8,8))(final_out)

    final_out = Activation('softmax',name='l0')(Reshape((400 * 400, n_label))(final_out))
    out1 = Activation('softmax',name='l1')(Reshape((400 * 400, n_label))(R_out4))
    out2 = Activation('softmax',name='l2')(Reshape((200 * 200, n_label))(R_out3))
    out3 = Activation('softmax',name='l3')(Reshape((100 * 100, n_label))(R_out2))
    out4 = Activation('softmax',name='l4')(Reshape((50 * 50, n_label))(R_out1))

    return [final_out, out1, out2, out3, out4]
예제 #4
0
파일: yad2k.py 프로젝트: fmannan/YOLOv3
def _main(args):
    config_path = os.path.expanduser(args.config_path)
    weights_path = os.path.expanduser(args.weights_path)
    assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(
        config_path)
    assert weights_path.endswith(
        '.weights'), '{} is not a .weights file'.format(weights_path)

    output_path = os.path.expanduser(args.output_path)
    assert output_path.endswith(
        '.h5'), 'output path {} is not a .h5 file'.format(output_path)
    output_root = os.path.splitext(output_path)[0]

    # Load weights and config.
    print('Loading weights.')
    weights_file = open(weights_path, 'rb')
    weights_header = np.ndarray(
        shape=(5, ), dtype='int32', buffer=weights_file.read(20))
    print('Weights Header: ', weights_header)
    # TODO: Check transpose flag when implementing fully connected layers.
    # transpose = (weight_header[0] > 1000) or (weight_header[1] > 1000)

    print('Parsing Darknet config.')
    unique_config_file = unique_config_sections(config_path)
    cfg_parser = configparser.ConfigParser()
    cfg_parser.read_file(unique_config_file)

    print('Creating Keras model.')
    if args.fully_convolutional:
        image_height, image_width = None, None
    else:
        image_height = int(cfg_parser['net_0']['height'])
        image_width = int(cfg_parser['net_0']['width'])

    prev_layer = Input(shape=(image_height, image_width, 3))
    all_layers = [prev_layer]
    outputs = []

    weight_decay = float(cfg_parser['net_0']['decay']
                         ) if 'net_0' in cfg_parser.sections() else 5e-4
    count = 0

    for section in cfg_parser.sections():
        print('Parsing section {}'.format(section))
        if section.startswith('convolutional'):
            filters = int(cfg_parser[section]['filters'])
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            pad = int(cfg_parser[section]['pad'])
            activation = cfg_parser[section]['activation']
            batch_normalize = 'batch_normalize' in cfg_parser[section]

            # Setting weights.
            # Darknet serializes convolutional weights as:
            # [bias/beta, [gamma, mean, variance], conv_weights]
            prev_layer_shape = K.int_shape(prev_layer)

            # TODO: This assumes channel last dim_ordering.
            weights_shape = (size, size, prev_layer_shape[-1], filters)
            darknet_w_shape = (filters, weights_shape[2], size, size)
            weights_size = np.product(weights_shape)

            print('conv2d', 'bn'
                  if batch_normalize else '  ', activation, weights_shape)

            conv_bias = np.ndarray(
                shape=(filters, ),
                dtype='float32',
                buffer=weights_file.read(filters * 4))
            count += filters

            if batch_normalize:
                bn_weights = np.ndarray(
                    shape=(3, filters),
                    dtype='float32',
                    buffer=weights_file.read(filters * 12))
                count += 3 * filters

                # TODO: Keras BatchNormalization mistakenly refers to var
                # as std.
                bn_weight_list = [
                    bn_weights[0],  # scale gamma
                    conv_bias,  # shift beta
                    bn_weights[1],  # running mean
                    bn_weights[2]  # running var
                ]

            conv_weights = np.ndarray(
                shape=darknet_w_shape,
                dtype='float32',
                buffer=weights_file.read(weights_size * 4))
            count += weights_size

            # DarkNet conv_weights are serialized Caffe-style:
            # (out_dim, in_dim, height, width)
            # We would like to set these to Tensorflow order:
            # (height, width, in_dim, out_dim)
            # TODO: Add check for Theano dim ordering.
            conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
            conv_weights = [conv_weights] if batch_normalize else [
                conv_weights, conv_bias
            ]

            # Handle activation.
            act_fn = None
            if activation == 'leaky':
                pass  # Add advanced activation later.
            elif activation != 'linear':
                raise ValueError(
                    'Unknown activation function `{}` in section {}'.format(
                        activation, section))

            padding = 'same' if pad == 1 and stride == 1 else 'valid'
            # Adjust padding model for darknet.
            if stride == 2:
                prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer)

            # Create Conv2D layer
            conv_layer = (Conv2D(
                filters, (size, size),
                strides=(stride, stride),
                kernel_regularizer=l2(weight_decay),
                use_bias=not batch_normalize,
                weights=conv_weights,
                activation=act_fn,
                padding=padding))(prev_layer)

            if batch_normalize:
                conv_layer = (BatchNormalization(
                    weights=bn_weight_list))(conv_layer)

            prev_layer = conv_layer

            if activation == 'linear':
                all_layers.append(prev_layer)
            elif activation == 'leaky':
                act_layer = LeakyReLU(alpha=0.1)(prev_layer)
                prev_layer = act_layer
                all_layers.append(act_layer)
        elif section.startswith('maxpool'):
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                MaxPooling2D(
                    padding='same',
                    pool_size=(size, size),
                    strides=(stride, stride))(prev_layer))
            prev_layer = all_layers[-1]
        elif section.startswith('avgpool'):
            if cfg_parser.items(section) != []:
                raise ValueError('{} with params unsupported.'.format(section))
            all_layers.append(GlobalAveragePooling2D()(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('route'):
            ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
            layers = [all_layers[i] for i in ids]

            if len(layers) > 1:
                print('Concatenating route layers:', layers)
                concatenate_layer = concatenate(layers)
                all_layers.append(concatenate_layer)
                prev_layer = concatenate_layer
            else:
                skip_layer = layers[0]  # only one layer to route
                all_layers.append(skip_layer)
                prev_layer = skip_layer

        elif section.startswith('shortcut'):
            ids = [int(i) for i in cfg_parser[section]['from'].split(',')][0]
            activation = cfg_parser[section]['activation']
            shortcut = add([all_layers[ids], prev_layer])
            if activation == 'linear':
                shortcut = Activation('linear')(shortcut)
            all_layers.append(shortcut)
            prev_layer = all_layers[-1]

        elif section.startswith('upsample'):
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                UpSampling2D(
                    size=(stride, stride))(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('yolo'):
            classes = int(cfg_parser[section]['classes'])
            # num = int(cfg_parser[section]['num'])
            # mask = int(cfg_parser[section]['mask'])
            n1, n2 = int(prev_layer.shape[1]), int(prev_layer.shape[2])
            n3 = 3
            n4 = (4 + 1 + classes)
            yolo = Reshape((n1, n2, n3, n4))(prev_layer)
            all_layers.append(yolo)
            prev_layer = all_layers[-1]
            outputs.append(len(all_layers) - 1)

        elif (section.startswith('net')):
            pass  # Configs not currently handled during model definition.
        else:
            raise ValueError(
                'Unsupported section header type: {}'.format(section))

    # Create and save model.
    model = Model(inputs=all_layers[0],
                  outputs=[all_layers[i] for i in outputs])
    print(model.summary())
    model.save('{}'.format(output_path))
    print('Saved Keras model to {}'.format(output_path))
    # Check to see if all weights have been read.
    remaining_weights = len(weights_file.read()) / 4
    weights_file.close()
    print('Read {} of {} from Darknet weights.'.format(count, count +
                                                       remaining_weights))
    if remaining_weights > 0:
        print('Warning: {} unused weights'.format(remaining_weights))

    if args.plot_model:
        plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
        print('Saved model plot to {}.png'.format(output_root))
예제 #5
0
    def __init__(
        self,
        layer_sizes,
        generator=None,
        n_samples=None,
        input_dim=None,
        aggregator=None,
        bias=True,
        dropout=0.,
        normalize="l2",
    ):
        # Set the aggregator layer used in the model
        if aggregator is None:
            self._aggregator = MeanAggregator
        elif issubclass(aggregator, Layer):
            self._aggregator = aggregator
        else:
            raise TypeError("Aggregator should be a subclass of Keras Layer")

        # Set the normalization layer used in the model
        if normalize == "l2":
            self._normalization = Lambda(lambda x: K.l2_normalize(x, axis=2))

        elif normalize is None or normalize == "none":
            self._normalization = Lambda(lambda x: x)

        # Get the input_dim and num_samples from the mapper if it is given
        # Use both the schema and head node type from the mapper
        # TODO: Refactor the horror of generator.generator.graph...
        if generator is not None:
            self.n_samples = generator.generator.num_samples
            feature_sizes = generator.generator.graph.node_feature_sizes()
            if len(feature_sizes) > 1:
                raise RuntimeError(
                    "GraphSAGE called on graph with more than one node type.")

            self.input_feature_size = feature_sizes.popitem()[1]

        elif n_samples is not None and input_dim is not None:
            self.n_samples = n_samples
            self.input_feature_size = input_dim

        else:
            raise RuntimeError(
                "If mapper is not provided, n_samples and input_dim must be specified."
            )

        # Model parameters
        self.n_layers = len(self.n_samples)
        self.bias = bias
        self.dropout = dropout

        # Feature dimensions for each layer
        self.dims = [self.input_feature_size] + layer_sizes

        # Aggregator functions for each layer
        self._aggs = [
            self._aggregator(
                output_dim=self.dims[layer + 1],
                bias=self.bias,
                act="relu" if layer < self.n_layers - 1 else "linear",
            ) for layer in range(self.n_layers)
        ]

        # Sizes of the neighbours for each layer
        self._neigh_reshape = [[
            Reshape((-1, max(1, self.n_samples[i]), self.dims[layer]))
            for i in range(self.n_layers - layer)
        ] for layer in range(self.n_layers)]

        self._normalization = Lambda(lambda x: K.l2_normalize(x, 2))
예제 #6
0

sequence_length = X.shape[1]
vocabulary_size = len(vocabulary_inv)
embedding_dim = 300
filter_sizes = [1,2,3,4,5,6]
num_filters = 512
drop = 0.5

epochs = 20
batch_size = 30

print("Creating Model...")
inputs = Input(shape=(sequence_length,), dtype='int32')
embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, input_length=sequence_length)(inputs)
reshape = Reshape((sequence_length,embedding_dim,1))(embedding)

conv_0 = Conv2D(num_filters, kernel_size=(filter_sizes[0], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)
conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[1], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)
conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)
conv_3 = Conv2D(num_filters, kernel_size=(filter_sizes[3], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)
conv_4 = Conv2D(num_filters, kernel_size=(filter_sizes[4], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)
conv_5 = Conv2D(num_filters, kernel_size=(filter_sizes[5], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)
maxpool_0 = MaxPool2D(pool_size=(sequence_length - filter_sizes[0] + 1, 1), strides=(1,1), padding='valid')(conv_0)
maxpool_1 = MaxPool2D(pool_size=(sequence_length - filter_sizes[1] + 1, 1), strides=(1,1), padding='valid')(conv_1)
maxpool_2 = MaxPool2D(pool_size=(sequence_length - filter_sizes[2] + 1, 1), strides=(1,1), padding='valid')(conv_2)
maxpool_3 = MaxPool2D(pool_size=(sequence_length - filter_sizes[3] + 1, 1), strides=(1,1), padding='valid')(conv_3)
maxpool_4 = MaxPool2D(pool_size=(sequence_length - filter_sizes[4] + 1, 1), strides=(1,1), padding='valid')(conv_4)
maxpool_5 = MaxPool2D(pool_size=(sequence_length - filter_sizes[5] + 1, 1), strides=(1,1), padding='valid')(conv_5)
concatenated_tensor = Concatenate(axis=1)([maxpool_0, maxpool_1, maxpool_2, maxpool_3, maxpool_4,maxpool_5])
flatten = Flatten()(concatenated_tensor)
예제 #7
0
def ssc_300(image_size,
            n_classes,
            l2_regularization=0.0005,
            min_scale=None,
            max_scale=None,
            scales=None,
            aspect_ratios_global=None,
            aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
            two_boxes_for_ar1=True,
            steps=[8, 16, 32, 64, 100, 300],
            offsets=None,
            subtract_mean=[123, 117, 104],
            divide_by_stddev=None,
            swap_channels=[2, 1, 0],
            predictors=[
                'conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2'
            ],
            hidden_size=[250, 250, 100],
            output_activation=False,
            lstm=False,
            condense_predictors=False):
    """
    Build a Keras model with SSC300 architecture, see references.

    The base network is a reduced atrous VGG-16, extended by the SSD architecture,
    as described in the paper. Most of the arguments that this function takes are only needed for the anchor
    box layers. In case you're training the network.

    Note: Requires Keras v2.0 or later. Currently works only with the
    TensorFlow backend (v1.0 or later).

    References: https://arxiv.org/abs/1512.02325v5

    :param tuple image_size: The input image size in the format `(height, width, channels)`.
    :param int n_classes: The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO.
    :param float l2_regularization: The L2-regularization rate. Applies to all convolutional layers.
        Set to zero to deactivate L2-regularization.
    :param float min_scale: The smallest scaling factor for the size of the anchor boxes as a fraction
        of the shorter side of the input images.
    :param float max_scale: The largest scaling factor for the size of the anchor boxes as a fraction
        of the shorter side of the input images. All scaling factors between the smallest and the
        largest will be linearly interpolated. Note that the second to last of the linearly interpolated
        scaling factors will actually be the scaling factor for the last predictor layer, while the last
        scaling factor is used for the second box for aspect ratio 1 in the last predictor layer
        if `two_boxes_for_ar1` is `True`.
    :param list scales: A list of floats containing scaling factors per convolutional predictor layer.
        This list must be one element longer than the number of predictor layers. The first `k` elements are the
        scaling factors for the `k` predictor layers, while the last element is used for the second box
        for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional
        last scaling factor must be passed either way, even if it is not being used. If a list is passed,
        this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero.
    :param list aspect_ratios_global: The list of aspect ratios for which anchor boxes are to be
        generated. This list is valid for all prediction layers.
    :param list aspect_ratios_per_layer: A list containing one aspect ratio list for each prediction layer.
        This allows you to set the aspect ratios for each predictor layer individually, which is the case for the
        original SSD300 implementation. If a list is passed, it overrides `aspect_ratios_global`.
    :param bool two_boxes_for_ar1: Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise.
        If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated
        using the scaling factor for the respective layer, the second one will be generated using
        geometric mean of said scaling factor and next bigger scaling factor.
    :param list steps: `None` or a list with as many elements as there are predictor layers. The elements can be
        either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many
        pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over
        the image. If the list contains ints/floats, then that value will be used for both spatial dimensions.
        If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`.
        If no steps are provided, then they will be computed such that the anchor box center points will form an
        equidistant grid within the image dimensions.
    :param list offsets: `None` or a list with as many elements as there are predictor layers. The elements can be
        either floats or tuples of two floats. These numbers represent for each predictor layer how many
        pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be
        as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions
        of the step size specified in the `steps` argument. If the list contains floats, then that value will
        be used for both spatial dimensions. If the list contains tuples of two floats, then they represent
        `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size.
    :param list subtract_mean: `None` or an array-like object of integers or floating point values
        of any shape that is broadcast-compatible with the image shape. The elements of this array will be
        subtracted from the image pixel intensity values. For example, pass a list of three integers
        to perform per-channel mean normalization for color images.
    :param list divide_by_stddev: `None` or an array-like object of non-zero integers or
        floating point values of any shape that is broadcast-compatible with the image shape. The image pixel
        intensity values will be divided by the elements of this array. For example, pass a list
        of three integers to perform per-channel standard deviation normalization for color images.
    :param list swap_channels: Either `False` or a list of integers representing the desired order in which the input
        image channels should be swapped.
    :param list predictors: names of the convolutional layers used as predictors
    :param list hidden_size: number of neurons for the 3 hidden fully-connected layers
    :param bool output_activation: whether to include or not the softplus activation function after the hidden layers
    :param bool lstm: whether to add or not an LSTM cell on top of the hidden layer
    :param bool condense_predictors: whether to condense or not the predictors in a single prediction

    :return model: The Keras SSC300 model.
    """

    n_predictor_layers = len(
        predictors
    )  # The number of predictor conv layers in the network is 6 for the original SSD300.
    l2_reg = l2_regularization  # Make the internal name shorter.
    img_height, img_width, img_channels = image_size[0], image_size[
        1], image_size[2]

    ############################################################################
    # Get a few exceptions out of the way.
    ############################################################################

    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        raise ValueError(
            "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified."
        )
    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError(
                "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}."
                .format(n_predictor_layers, len(aspect_ratios_per_layer)))

    if (min_scale is None or max_scale is None) and scales is None:
        raise ValueError(
            "Either `min_scale` and `max_scale` or `scales` need to be specified."
        )
    if scales:
        if len(scales) != n_predictor_layers + 1:
            raise ValueError(
                "It must be either scales is None or len(scales) == {}, but len(scales) == {}."
                .format(n_predictor_layers + 1, len(scales)))
    else:  # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale`
        scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1)

    if len(hidden_size) != 3:
        raise ValueError(
            "3 hidden size values must be passed, but {} values were received."
            .format(len(hidden_size)))
    hidden_size = np.array(hidden_size)
    if np.any(hidden_size <= 0):
        raise ValueError(
            "All hidden sizes must be >0, but the sizes given are {}".format(
                hidden_size))

    if (not (steps is None)) and (len(steps) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one step value per predictor layer.")

    if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one offset value per predictor layer.")

    ############################################################################
    # Compute the anchor box parameters.
    ############################################################################

    # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers.
    if aspect_ratios_per_layer:
        aspect_ratios = aspect_ratios_per_layer
    else:
        aspect_ratios = [aspect_ratios_global] * n_predictor_layers

    # Compute the number of boxes to be predicted per cell for each predictor layer.
    # We need this so that we know how many channels the predictor layers need to have.
    if aspect_ratios_per_layer:
        n_boxes = []
        for ar in aspect_ratios_per_layer:
            if (1 in ar) & two_boxes_for_ar1:
                n_boxes.append(len(ar) +
                               1)  # +1 for the second box for aspect ratio 1
            else:
                n_boxes.append(len(ar))
    else:  # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer
        if (1 in aspect_ratios_global) & two_boxes_for_ar1:
            n_boxes = len(aspect_ratios_global) + 1
        else:
            n_boxes = len(aspect_ratios_global)
        n_boxes = [n_boxes] * n_predictor_layers

    if steps is None:
        steps = [None] * n_predictor_layers
    if offsets is None:
        offsets = [None] * n_predictor_layers

    ############################################################################
    # Define functions for the Lambda layers below.
    ############################################################################

    def identity_layer(tensor):
        return tensor

    def input_mean_normalization(tensor):
        return tensor - np.array(subtract_mean)

    def input_stddev_normalization(tensor):
        return tensor / np.array(divide_by_stddev)

    def input_channel_swap(tensor):
        if len(swap_channels) == 3:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]]
            ],
                           axis=-1)
        elif len(swap_channels) == 4:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]], tensor[..., swap_channels[3]]
            ],
                           axis=-1)

    ############################################################################
    # Build the network.
    ############################################################################

    x = Input(shape=(img_height, img_width, img_channels))

    # The following identity layer is only needed so that the subsequent lambda layers can be optional.
    x1 = Lambda(identity_layer,
                output_shape=(img_height, img_width, img_channels),
                name='identity_layer')(x)
    if not (subtract_mean is None):
        x1 = Lambda(input_mean_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_mean_normalization')(x1)
    if not (divide_by_stddev is None):
        x1 = Lambda(input_stddev_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_stddev_normalization')(x1)
    if swap_channels:
        x1 = Lambda(input_channel_swap,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_channel_swap')(x1)

    conv1_1 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv1_1')(x1)
    conv1_2 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv1_2')(conv1_1)
    pool1 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool1')(conv1_2)

    conv2_1 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv2_1')(pool1)
    conv2_2 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv2_2')(conv2_1)
    pool2 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool2')(conv2_2)

    conv3_1 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_1')(pool2)
    conv3_2 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_2')(conv3_1)
    conv3_3 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_3')(conv3_2)
    pool3 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool3')(conv3_3)

    conv4_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_1')(pool3)
    conv4_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_2')(conv4_1)
    conv4_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_3')(conv4_2)
    pool4 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool4')(conv4_3)

    conv5_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_1')(pool4)
    conv5_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_2')(conv5_1)
    conv5_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_3')(conv5_2)
    pool5 = MaxPooling2D(pool_size=(3, 3),
                         strides=(1, 1),
                         padding='same',
                         name='pool5')(conv5_3)

    fc6 = Conv2D(1024, (3, 3),
                 dilation_rate=(6, 6),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 kernel_regularizer=l2(l2_reg),
                 name='fc6')(pool5)

    fc7 = Conv2D(1024, (1, 1),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 kernel_regularizer=l2(l2_reg),
                 name='fc7')(fc6)

    conv6_1 = Conv2D(256, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv6_1')(fc7)
    conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv6_padding')(conv6_1)
    conv6_2 = Conv2D(512, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv6_2')(conv6_1)

    conv7_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv7_1')(conv6_2)
    conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv7_padding')(conv7_1)
    conv7_2 = Conv2D(256, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv7_2')(conv7_1)

    conv8_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv8_1')(conv7_2)
    conv8_2 = Conv2D(256, (3, 3),
                     strides=(1, 1),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv8_2')(conv8_1)

    conv9_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv9_1')(conv8_2)
    conv9_2 = Conv2D(256, (3, 3),
                     strides=(1, 1),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv9_2')(conv9_1)

    # Feed conv4_3 into the L2 normalization layer
    conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3)

    conv_features = {
        'conv4_3': conv4_3_norm,
        'fc7': fc7,
        'conv6_2': conv6_2,
        'conv7_2': conv7_2,
        'conv8_2': conv8_2,
        'conv9_2': conv9_2
    }
    predictor_layers = []

    ### Build the predictor layers on top of the base network
    for predictor in predictors:
        flatten = Flatten(name='{}_flat'.format(predictor))(
            conv_features[predictor])
        d1 = Dense(hidden_size[0], name='{}_d1'.format(predictor))(flatten)
        d1bn = BatchNormalization(name='{}_bn1'.format(predictor))(d1)
        r1 = Activation(activation='relu',
                        name='{}_r1'.format(predictor))(d1bn)
        d2 = Dense(hidden_size[1], name='{}_d2'.format(predictor))(r1)
        d2bn = BatchNormalization(name='{}_bn2'.format(predictor))(d2)
        r2 = Activation(activation='relu',
                        name='{}_r2'.format(predictor))(d2bn)
        d3 = Dense(hidden_size[2], name='{}_d3'.format(predictor))(r2)
        d3bn = BatchNormalization(name='{}_bn3'.format(predictor))(d3)
        r3 = Activation(activation='relu',
                        name='{}_r3'.format(predictor))(d3bn)
        pred = Dense(n_classes, name='{}_pred'.format(predictor))(r3)
        predictor_layers.append(pred)

    # Concatenate the output of the different predictors
    # Output shape of `predictions`: (batch, n_predictors, n_classes)
    predictions = Concatenate(axis=1, name='predictions1')(predictor_layers)
    if output_activation:
        predictions = Activation(activation='softplus')(predictions)
    if lstm:
        predictions = Reshape((n_predictor_layers, n_classes),
                              name='lstm_predictions_res')(predictions)
        predictions = Bidirectional(LSTM(20, return_sequences=False),
                                    name='lstm_predictions')(predictions)
    if condense_predictors:
        predictions = Dense(n_classes,
                            name='predictions_condensed')(predictions)

    return Model(inputs=x, outputs=predictions)
예제 #8
0
session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
K.set_session(session)

# Supress warnings about wrong compilation of TensorFlow.
tf.logging.set_verbosity(tf.logging.ERROR)

noise_size = 100

## G

z = Input(shape=[noise_size])

G = Dense(8 * 4 * 256)(z)
G = BatchNormalization(momentum=0.9)(G)
G = LeakyReLU(alpha=0.2)(G)
G = Reshape((4, 8, 256))(G)

G = UpSampling2D()(G)
G = Conv2D(128, (5, 5), padding='same')(G)
G = BatchNormalization(momentum=0.9)(G)
G = LeakyReLU(alpha=0.2)(G)

G = UpSampling2D()(G)
G = Conv2D(64, (5, 5), padding='same')(G)
G = BatchNormalization(momentum=0.9)(G)
G = LeakyReLU(alpha=0.2)(G)

G = UpSampling2D()(G)
G = Conv2D(32, (5, 5), padding='same')(G)
G = BatchNormalization(momentum=0.9)(G)
G = LeakyReLU(alpha=0.2)(G)
예제 #9
0
def ssd_300(image_size,
            n_classes,
            min_scale=None,
            max_scale=None,
            scales=None,
            aspect_ratios_global=None,
            aspect_ratios_per_layer=[[0.5, 1.0, 2.0],
                                     [1.0 / 3.0, 0.5, 1.0, 2.0, 3.0],
                                     [1.0 / 3.0, 0.5, 1.0, 2.0, 3.0],
                                     [1.0 / 3.0, 0.5, 1.0, 2.0, 3.0],
                                     [0.5, 1.0, 2.0], [0.5, 1.0, 2.0]],
            two_boxes_for_ar1=True,
            steps=None,
            offsets=None,
            limit_boxes=False,
            variances=[0.1, 0.1, 0.2, 0.2],
            coords='centroids',
            normalize_coords=False,
            subtract_mean=None,
            divide_by_stddev=None,
            swap_channels=False,
            return_predictor_sizes=False):
    '''
    Build a Keras model with SSD_300 architecture, see references.

    The base network is a reduced atrous VGG-16, extended by the SSD architecture,
    as described in the paper.

    In case you're wondering why this function has so many arguments: All arguments except
    the first two (`image_size` and `n_classes`) are only needed so that the anchor box
    layers can produce the correct anchor boxes. In case you're training the network, the
    parameters passed here must be the same as the ones used to set up `SSDBoxEncoder`.
    In case you're loading trained weights, the parameters passed here must be the same
    as the ones used to produce the trained weights.

    Some of these arguments are explained in more detail in the documentation of the
    `SSDBoxEncoder` class.

    Note: Requires Keras v2.0 or later. Currently works only with the
    TensorFlow backend (v1.0 or later).

    Arguments:
        image_size (tuple): The input image size in the format `(height, width, channels)`.
        n_classes (int): The number of categories for classification including
            the background class (i.e. the number of positive classes +1 for
            the background calss).
        min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction
            of the shorter side of the input images. Defaults to 0.1.
        max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction
            of the shorter side of the input images. All scaling factors between the smallest and the
            largest will be linearly interpolated. Note that the second to last of the linearly interpolated
            scaling factors will actually be the scaling factor for the last predictor layer, while the last
            scaling factor is used for the second box for aspect ratio 1 in the last predictor layer
            if `two_boxes_for_ar1` is `True`. Defaults to 0.9.
        scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer.
            This list must be one element longer than the number of predictor layers. The first `k` elements are the
            scaling factors for the `k` predictor layers, while the last element is used for the second box
            for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional
            last scaling factor must be passed either way, even if it is not being used.
            Defaults to `None`. If a list is passed, this argument overrides `min_scale` and
            `max_scale`. All scaling factors must be greater than zero.
        aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be
            generated. This list is valid for all prediction layers. Defaults to None.
        aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each prediction layer.
            This allows you to set the aspect ratios for each predictor layer individually, which is the case for the
            original SSD300 implementation. If a list is passed, it overrides `aspect_ratios_global`.
            Defaults to the aspect ratios used in the original SSD300 architecture, i.e.:
                [[0.5, 1.0, 2.0],
                 [1.0/3.0, 0.5, 1.0, 2.0, 3.0],
                 [1.0/3.0, 0.5, 1.0, 2.0, 3.0],
                 [1.0/3.0, 0.5, 1.0, 2.0, 3.0],
                 [0.5, 1.0, 2.0],
                 [0.5, 1.0, 2.0]]
        two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise.
            If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated
            using the scaling factor for the respective layer, the second one will be generated using
            geometric mean of said scaling factor and next bigger scaling factor. Defaults to `True`, following the original
            implementation.
        steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be
            either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many
            pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over
            the image. If the list contains ints/floats, then that value will be used for both spatial dimensions.
            If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`.
            If no steps are provided, then they will be computed such that the anchor box center points will form an
            equidistant grid within the image dimensions. Defaults to `None`.
        offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be
            either floats or tuples of two floats. These numbers represent for each predictor layer how many
            pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be
            as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions
            of the step size specified in the `steps` argument. If the list contains floats, then that value will
            be used for both spatial dimensions. If the list contains tuples of two floats, then they represent
            `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size.
            Defaults to `None`.
        limit_boxes (bool, optional): If `True`, limits box coordinates to stay within image boundaries.
            This would normally be set to `True`, but here it defaults to `False`, following the original
            implementation.
        variances (list, optional): A list of 4 floats >0 with scaling factors (actually it's not factors but divisors
            to be precise) for the encoded predicted box coordinates. A variance value of 1.0 would apply
            no scaling at all to the predictions, while values in (0,1) upscale the encoded predictions and values greater
            than 1.0 downscale the encoded predictions. Defaults to `[0.1, 0.1, 0.2, 0.2]`, following the original implementation.
            The coordinate format must be 'centroids'.
        coords (str, optional): The box coordinate format to be used. Can be either 'centroids' for the format
            `(cx, cy, w, h)` (box center coordinates, width, and height) or 'minmax' for the format
            `(xmin, xmax, ymin, ymax)`. Defaults to 'centroids', following the original implementation.
        normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates,
            i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. Defaults to `False`.
        subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values
            of any shape that is broadcast-compatible with the image shape. The elements of this array will be
            subtracted from the image pixel intensity values. For example, pass a list of three integers
            to perform per-channel mean normalization for color images. Defaults to `None`.
        divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or
            floating point values of any shape that is broadcast-compatible with the image shape. The image pixel
            intensity values will be divided by the elements of this array. For example, pass a list
            of three integers to perform per-channel standard deviation normalization for color images.
            Defaults to `None`.
        swap_channels (bool, optional): If `True` the color channel order of the input images will be reversed,
            i.e. if the input color channel order is RGB, the color channels will be swapped to BGR. Note that the
            original Caffe implementation assumes BGR input. Defaults to `True`.
        return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also
            a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since
            you can always get their sizes easily via the Keras API, but it's convenient and less error-prone
            to get them this way. THey are only relevant for training anyway (SSDBoxEncoder needs to know the
            spatial dimensions of the predictor layers), for inference you don't need them.

    Returns:
        model: The Keras SSD model.
        predictor_sizes: A Numpy array containing the `(height, width)` portion
            of the output tensor shape for each convolutional predictor layer. During
            training, the generator function needs this in order to transform
            the ground truth labels into tensors of identical structure as the
            output tensors of the model, which is in turn needed for the cost
            function.

    References:
        https://arxiv.org/abs/1512.02325v5
    '''

    n_predictor_layers = 6  # The number of predictor conv layers in the network is 6 for the original SSD300

    # Get a few exceptions out of the way first
    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        raise ValueError(
            "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified."
        )
    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError(
                "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}."
                .format(n_predictor_layers, len(aspect_ratios_per_layer)))

    if (min_scale is None or max_scale is None) and scales is None:
        raise ValueError(
            "Either `min_scale` and `max_scale` or `scales` need to be specified."
        )
    if scales:
        if len(scales) != n_predictor_layers + 1:
            raise ValueError(
                "It must be either scales is None or len(scales) == {}, but len(scales) == {}."
                .format(n_predictor_layers + 1, len(scales)))
    else:  # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale`
        scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1)

    if len(variances) != 4:
        raise ValueError(
            "4 variance values must be pased, but {} values were received.".
            format(len(variances)))
    variances = np.array(variances)
    if np.any(variances <= 0):
        raise ValueError(
            "All variances must be >0, but the variances given are {}".format(
                variances))

    if (not (steps is None)) and (len(steps) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one step value per predictor layer.")

    if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one offset value per predictor layer.")

    # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers.
    if aspect_ratios_per_layer:
        aspect_ratios = aspect_ratios_per_layer
    else:
        aspect_ratios = [aspect_ratios_global] * n_predictor_layers

    # Compute the number of boxes to be predicted per cell for each predictor layer.
    # We need this so that we know how many channels the predictor layers need to have.
    if aspect_ratios_per_layer:
        n_boxes = []
        for ar in aspect_ratios_per_layer:
            if (1 in ar) & two_boxes_for_ar1:
                n_boxes.append(len(ar) +
                               1)  # +1 for the second box for aspect ratio 1
            else:
                n_boxes.append(len(ar))
    else:  # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer
        if (1 in aspect_ratios_global) & two_boxes_for_ar1:
            n_boxes = len(aspect_ratios_global) + 1
        else:
            n_boxes = len(aspect_ratios_global)
        n_boxes = [n_boxes] * n_predictor_layers

    if steps is None:
        steps = [None] * n_predictor_layers
    if offsets is None:
        offsets = [None] * n_predictor_layers

    # Input image format
    img_height, img_width, img_channels = image_size[0], image_size[
        1], image_size[2]

    ### Build the actual network.

    x = Input(shape=(img_height, img_width, img_channels))

    # The following identity layer is only needed so that subsequent two lambda layers can be optional.
    x1 = Lambda(lambda z: z,
                output_shape=(img_height, img_width, img_channels),
                name='idendity_layer')(x)
    if not (subtract_mean is None):
        x1 = Lambda(lambda z: z - np.array(subtract_mean),
                    output_shape=(img_height, img_width, img_channels),
                    name='input_mean_normalization')(x1)
    if not (divide_by_stddev is None):
        x1 = Lambda(lambda z: z / np.array(divide_by_stddev),
                    output_shape=(img_height, img_width, img_channels),
                    name='input_stddev_normalization')(x1)
    if swap_channels and (img_channels == 3):
        x1 = Lambda(lambda z: z[..., ::-1],
                    output_shape=(img_height, img_width, img_channels),
                    name='input_channel_swap')(x1)

    conv1_1 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv1_1')(x1)
    conv1_2 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv1_2')(conv1_1)
    pool1 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool1')(conv1_2)

    conv2_1 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv2_1')(pool1)
    conv2_2 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv2_2')(conv2_1)
    pool2 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool2')(conv2_2)

    conv3_1 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv3_1')(pool2)
    conv3_2 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv3_2')(conv3_1)
    conv3_3 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv3_3')(conv3_2)
    pool3 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool3')(conv3_3)

    conv4_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv4_1')(pool3)
    conv4_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv4_2')(conv4_1)
    conv4_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv4_3')(conv4_2)
    pool4 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool4')(conv4_3)

    conv5_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv5_1')(pool4)
    conv5_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv5_2')(conv5_1)
    conv5_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv5_3')(conv5_2)
    pool5 = MaxPooling2D(pool_size=(3, 3),
                         strides=(1, 1),
                         padding='same',
                         name='pool5')(conv5_3)

    fc6 = Conv2D(1024, (3, 3),
                 dilation_rate=(6, 6),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 name='fc6')(pool5)

    fc7 = Conv2D(1024, (1, 1),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 name='fc7')(fc6)

    conv6_1 = Conv2D(256, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv6_1')(fc7)
    conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv6_padding')(conv6_1)
    conv6_2 = Conv2D(512, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     name='conv6_2')(conv6_1)

    conv7_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv7_1')(conv6_2)
    conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv7_padding')(conv7_1)
    conv7_2 = Conv2D(256, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     name='conv7_2')(conv7_1)

    conv8_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv8_1')(conv7_2)
    conv8_2 = Conv2D(256, (3, 3),
                     strides=(1, 1),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     name='conv8_2')(conv8_1)

    conv9_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     name='conv9_1')(conv8_2)
    conv9_2 = Conv2D(256, (3, 3),
                     strides=(1, 1),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     name='conv9_2')(conv9_1)

    # Feed conv4_3 into the L2 normalization layer
    conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3)

    ### Build the convolutional predictor layers on top of the base network

    # We precidt `n_classes` confidence values for each box, hence the confidence predictors have depth `n_boxes * n_classes`
    # Output shape of the confidence layers: `(batch, height, width, n_boxes * n_classes)`
    conv4_3_norm_mbox_conf = Conv2D(
        n_boxes[0] * n_classes, (3, 3),
        padding='same',
        kernel_initializer='he_normal',
        name='conv4_3_norm_mbox_conf')(conv4_3_norm)
    fc7_mbox_conf = Conv2D(n_boxes[1] * n_classes, (3, 3),
                           padding='same',
                           kernel_initializer='he_normal',
                           name='fc7_mbox_conf')(fc7)
    conv6_2_mbox_conf = Conv2D(n_boxes[2] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               name='conv6_2_mbox_conf')(conv6_2)
    conv7_2_mbox_conf = Conv2D(n_boxes[3] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               name='conv7_2_mbox_conf')(conv7_2)
    conv8_2_mbox_conf = Conv2D(n_boxes[4] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               name='conv8_2_mbox_conf')(conv8_2)
    conv9_2_mbox_conf = Conv2D(n_boxes[5] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               name='conv9_2_mbox_conf')(conv9_2)
    # We predict 4 box coordinates for each box, hence the localization predictors have depth `n_boxes * 4`
    # Output shape of the localization layers: `(batch, height, width, n_boxes * 4)`
    conv4_3_norm_mbox_loc = Conv2D(n_boxes[0] * 4, (3, 3),
                                   padding='same',
                                   kernel_initializer='he_normal',
                                   name='conv4_3_norm_mbox_loc')(conv4_3_norm)
    fc7_mbox_loc = Conv2D(n_boxes[1] * 4, (3, 3),
                          padding='same',
                          kernel_initializer='he_normal',
                          name='fc7_mbox_loc')(fc7)
    conv6_2_mbox_loc = Conv2D(n_boxes[2] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              name='conv6_2_mbox_loc')(conv6_2)
    conv7_2_mbox_loc = Conv2D(n_boxes[3] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              name='conv7_2_mbox_loc')(conv7_2)
    conv8_2_mbox_loc = Conv2D(n_boxes[4] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              name='conv8_2_mbox_loc')(conv8_2)
    conv9_2_mbox_loc = Conv2D(n_boxes[5] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              name='conv9_2_mbox_loc')(conv9_2)

    ### Generate the anchor boxes (called "priors" in the original Caffe/C++ implementation, so I'll keep their layer names)

    # Output shape of anchors: `(batch, height, width, n_boxes, 8)`
    conv4_3_norm_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[0],
        next_scale=scales[1],
        aspect_ratios=aspect_ratios[0],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[0],
        this_offsets=offsets[0],
        limit_boxes=limit_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv4_3_norm_mbox_priorbox')(conv4_3_norm_mbox_loc)
    fc7_mbox_priorbox = AnchorBoxes(img_height,
                                    img_width,
                                    this_scale=scales[1],
                                    next_scale=scales[2],
                                    aspect_ratios=aspect_ratios[1],
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    this_steps=steps[1],
                                    this_offsets=offsets[1],
                                    limit_boxes=limit_boxes,
                                    variances=variances,
                                    coords=coords,
                                    normalize_coords=normalize_coords,
                                    name='fc7_mbox_priorbox')(fc7_mbox_loc)
    conv6_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[2],
        next_scale=scales[3],
        aspect_ratios=aspect_ratios[2],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[2],
        this_offsets=offsets[2],
        limit_boxes=limit_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv6_2_mbox_priorbox')(conv6_2_mbox_loc)
    conv7_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[3],
        next_scale=scales[4],
        aspect_ratios=aspect_ratios[3],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[3],
        this_offsets=offsets[3],
        limit_boxes=limit_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv7_2_mbox_priorbox')(conv7_2_mbox_loc)
    conv8_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[4],
        next_scale=scales[5],
        aspect_ratios=aspect_ratios[4],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[4],
        this_offsets=offsets[4],
        limit_boxes=limit_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv8_2_mbox_priorbox')(conv8_2_mbox_loc)
    conv9_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[5],
        next_scale=scales[6],
        aspect_ratios=aspect_ratios[5],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[5],
        this_offsets=offsets[5],
        limit_boxes=limit_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv9_2_mbox_priorbox')(conv9_2_mbox_loc)

    ### Reshape

    # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)`
    # We want the classes isolated in the last axis to perform softmax on them
    conv4_3_norm_mbox_conf_reshape = Reshape(
        (-1, n_classes),
        name='conv4_3_norm_mbox_conf_reshape')(conv4_3_norm_mbox_conf)
    fc7_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='fc7_mbox_conf_reshape')(fc7_mbox_conf)
    conv6_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv6_2_mbox_conf_reshape')(conv6_2_mbox_conf)
    conv7_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv7_2_mbox_conf_reshape')(conv7_2_mbox_conf)
    conv8_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv8_2_mbox_conf_reshape')(conv8_2_mbox_conf)
    conv9_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv9_2_mbox_conf_reshape')(conv9_2_mbox_conf)
    # Reshape the box predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)`
    # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss
    conv4_3_norm_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv4_3_norm_mbox_loc_reshape')(conv4_3_norm_mbox_loc)
    fc7_mbox_loc_reshape = Reshape((-1, 4),
                                   name='fc7_mbox_loc_reshape')(fc7_mbox_loc)
    conv6_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv6_2_mbox_loc_reshape')(conv6_2_mbox_loc)
    conv7_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv7_2_mbox_loc_reshape')(conv7_2_mbox_loc)
    conv8_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv8_2_mbox_loc_reshape')(conv8_2_mbox_loc)
    conv9_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv9_2_mbox_loc_reshape')(conv9_2_mbox_loc)
    # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)`
    conv4_3_norm_mbox_priorbox_reshape = Reshape(
        (-1, 8),
        name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox)
    fc7_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox)
    conv6_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox)
    conv7_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox)
    conv8_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox)
    conv9_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox)

    ### Concatenate the predictions from the different layers

    # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions,
    # so we want to concatenate along axis 1, the number of boxes per layer
    # Output shape of `mbox_conf`: (batch, n_boxes_total, n_classes)
    mbox_conf = Concatenate(axis=1, name='mbox_conf')([
        conv4_3_norm_mbox_conf_reshape, fc7_mbox_conf_reshape,
        conv6_2_mbox_conf_reshape, conv7_2_mbox_conf_reshape,
        conv8_2_mbox_conf_reshape, conv9_2_mbox_conf_reshape
    ])

    # Output shape of `mbox_loc`: (batch, n_boxes_total, 4)
    mbox_loc = Concatenate(axis=1, name='mbox_loc')([
        conv4_3_norm_mbox_loc_reshape, fc7_mbox_loc_reshape,
        conv6_2_mbox_loc_reshape, conv7_2_mbox_loc_reshape,
        conv8_2_mbox_loc_reshape, conv9_2_mbox_loc_reshape
    ])

    # Output shape of `mbox_priorbox`: (batch, n_boxes_total, 8)
    mbox_priorbox = Concatenate(axis=1, name='mbox_priorbox')([
        conv4_3_norm_mbox_priorbox_reshape, fc7_mbox_priorbox_reshape,
        conv6_2_mbox_priorbox_reshape, conv7_2_mbox_priorbox_reshape,
        conv8_2_mbox_priorbox_reshape, conv9_2_mbox_priorbox_reshape
    ])

    # The box coordinate predictions will go into the loss function just the way they are,
    # but for the class predictions, we'll apply a softmax activation layer first
    mbox_conf_softmax = Activation('softmax',
                                   name='mbox_conf_softmax')(mbox_conf)

    # Concatenate the class and box predictions and the anchors to one large predictions vector
    # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8)
    predictions = Concatenate(axis=2, name='predictions')(
        [mbox_conf_softmax, mbox_loc, mbox_priorbox])

    model = Model(inputs=x, outputs=predictions)

    if return_predictor_sizes:
        # Get the spatial dimensions (height, width) of the predictor conv layers, we need them to
        # be able to generate the default boxes for the matching process outside of the model during training.
        # Note that the original implementation performs anchor box matching inside the loss function. We don't do that.
        # Instead, we'll do it in the batch generator function.
        # The spatial dimensions are the same for the confidence and localization predictors, so we just take those of the conf layers.
        predictor_sizes = np.array([
            conv4_3_norm_mbox_conf._keras_shape[1:3],
            fc7_mbox_conf._keras_shape[1:3],
            conv6_2_mbox_conf._keras_shape[1:3],
            conv7_2_mbox_conf._keras_shape[1:3],
            conv8_2_mbox_conf._keras_shape[1:3],
            conv9_2_mbox_conf._keras_shape[1:3]
        ])
        return model, predictor_sizes
    else:
        return model
예제 #10
0
def new_lpcnet_model(frame_size=160,
                     rnn_units1=384,
                     rnn_units2=16,
                     nb_used_features=38,
                     training=False,
                     use_gpu=True):
    pcm = Input(shape=(None, 3))
    feat = Input(shape=(None, nb_used_features))
    pitch = Input(shape=(None, 1))
    dec_feat = Input(shape=(None, 128))
    dec_state1 = Input(shape=(rnn_units1, ))
    dec_state2 = Input(shape=(rnn_units2, ))

    padding = 'valid' if training else 'same'
    fconv1 = Conv1D(128,
                    3,
                    padding=padding,
                    activation='tanh',
                    name='feature_conv1')
    fconv2 = Conv1D(128,
                    3,
                    padding=padding,
                    activation='tanh',
                    name='feature_conv2')

    embed = Embedding(256,
                      embed_size,
                      embeddings_initializer=PCMInit(),
                      name='embed_sig')
    cpcm = Reshape((-1, embed_size * 3))(embed(pcm))

    pembed = Embedding(256, 64, name='embed_pitch')
    cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))])

    cfeat = fconv2(fconv1(cat_feat))

    fdense1 = Dense(128, activation='tanh', name='feature_dense1')
    fdense2 = Dense(128, activation='tanh', name='feature_dense2')

    cfeat = fdense2(fdense1(cfeat))

    rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1))

    if use_gpu:
        rnn = CuDNNGRU(rnn_units1,
                       return_sequences=True,
                       return_state=True,
                       name='gru_a')
        rnn2 = CuDNNGRU(rnn_units2,
                        return_sequences=True,
                        return_state=True,
                        name='gru_b')
    else:
        rnn = GRU(rnn_units1,
                  return_sequences=True,
                  return_state=True,
                  recurrent_activation="sigmoid",
                  reset_after='true',
                  name='gru_a')
        rnn2 = GRU(rnn_units2,
                   return_sequences=True,
                   return_state=True,
                   recurrent_activation="sigmoid",
                   reset_after='true',
                   name='gru_b')

    rnn_in = Concatenate()([cpcm, rep(cfeat)])
    md = MDense(pcm_levels, activation='softmax', name='dual_fc')
    gru_out1, _ = rnn(rnn_in)
    gru_out2, _ = rnn2(Concatenate()([gru_out1, rep(cfeat)]))
    ulaw_prob = md(gru_out2)

    model = Model([pcm, feat, pitch], ulaw_prob)
    model.rnn_units1 = rnn_units1
    model.rnn_units2 = rnn_units2
    model.nb_used_features = nb_used_features
    model.frame_size = frame_size

    encoder = Model([feat, pitch], cfeat)

    dec_rnn_in = Concatenate()([cpcm, dec_feat])
    dec_gru_out1, state1 = rnn(dec_rnn_in, initial_state=dec_state1)
    dec_gru_out2, state2 = rnn2(Concatenate()([dec_gru_out1, dec_feat]),
                                initial_state=dec_state2)
    dec_ulaw_prob = md(dec_gru_out2)

    decoder = Model([pcm, dec_feat, dec_state1, dec_state2],
                    [dec_ulaw_prob, state1, state2])
    return model, encoder, decoder
예제 #11
0
파일: twdcpm.py 프로젝트: aascode/MEx
def build_late_fusion():
    input_dc = Input(shape=(12, 16 * window * dc_frames_per_second, 1))
    input_t = Input(shape=(window, feature_length, 1))
    input_w = Input(shape=(window, feature_length, 1))
    input_pm = Input(shape=(window, pm_frames_per_second * pm_frame_size, 1))

    x = Conv2D(32, kernel_size=(3, 3), activation='relu')(input_dc)
    x = MaxPooling2D(pool_size=2, data_format='channels_last')(x)
    x = BatchNormalization()(x)
    x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=2, data_format='channels_last')(x)
    x = BatchNormalization()(x)
    x = Flatten()(x)
    x = Dense(1200, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dense(600, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dense(100, activation='relu')(x)
    x = BatchNormalization()(x)

    y = TimeDistributed(Conv1D(32, kernel_size=5, activation='relu'))(input_t)
    y = TimeDistributed(MaxPooling1D(pool_size=2))(y)
    y = TimeDistributed(BatchNormalization())(y)
    y = TimeDistributed(Conv1D(64, kernel_size=5, activation='relu'))(y)
    y = TimeDistributed(MaxPooling1D(pool_size=2))(y)
    y = TimeDistributed(BatchNormalization())(y)
    y = Reshape((K.int_shape(y)[1], K.int_shape(y)[2] * K.int_shape(y)[3]))(y)
    y = LSTM(1200)(y)
    y = BatchNormalization()(y)
    y = Dense(600, activation='relu')(y)
    y = BatchNormalization()(y)
    y = Dense(100, activation='relu')(y)
    y = BatchNormalization()(y)

    z = TimeDistributed(Conv1D(32, kernel_size=5, activation='relu'))(input_pm)
    z = TimeDistributed(MaxPooling1D(pool_size=2))(z)
    z = TimeDistributed(BatchNormalization())(z)
    z = TimeDistributed(Conv1D(64, kernel_size=5, activation='relu'))(z)
    z = TimeDistributed(MaxPooling1D(pool_size=2))(z)
    z = TimeDistributed(BatchNormalization())(z)
    z = Reshape((K.int_shape(z)[1], K.int_shape(z)[2] * K.int_shape(z)[3]))(z)
    z = LSTM(1200)(z)
    z = BatchNormalization()(z)
    z = Dense(600, activation='relu')(z)
    z = BatchNormalization()(z)
    z = Dense(100, activation='relu')(z)
    z = BatchNormalization()(z)

    w = TimeDistributed(Conv1D(32, kernel_size=5, activation='relu'))(input_w)
    w = TimeDistributed(MaxPooling1D(pool_size=2))(w)
    w = TimeDistributed(BatchNormalization())(w)
    w = TimeDistributed(Conv1D(64, kernel_size=5, activation='relu'))(w)
    w = TimeDistributed(MaxPooling1D(pool_size=2))(w)
    w = TimeDistributed(BatchNormalization())(w)
    w = Reshape((K.int_shape(w)[1], K.int_shape(w)[2] * K.int_shape(w)[3]))(w)
    w = LSTM(1200)(w)
    w = BatchNormalization()(w)
    w = Dense(600, activation='relu')(w)
    w = BatchNormalization()(w)
    w = Dense(100, activation='relu')(w)
    w = BatchNormalization()(w)

    c = concatenate([x, y, z, w])
    c = Dense(len(activity_list), activation='softmax')(c)

    model = Model(inputs=[input_t, input_w, input_dc, input_pm], outputs=c)
    model.summary()
    return model
예제 #12
0
main_input = Input(shape=(maxlen, ), dtype='int32')
embedding_map = Embedding(output_dim=embedding_dims,
                          input_dim=max_features,
                          input_length=maxlen,
                          W_regularizer=l2(reg_conf[0]))(main_input)

##
convs = []
for index in range(embedding_dims):
    #print ("i:",index)
    t = Lambda(slice,
               output_shape=(maxlen, 1),
               arguments={'index': index},
               name='slice_' + str(index + 1))(embedding_map)
    x = Reshape((maxlen, 1, 1))(t)  #(batch, height, width, channels)

    #第一层conv and pooling
    x = Convolution2D(m1,
                      w1,
                      1,
                      border_mode='valid',
                      subsample=(1, 1),
                      activation='linear',
                      dim_ordering='tf',
                      W_regularizer=l2(reg_conf[1]),
                      b_regularizer=l2(reg_conf[1]))(x)

    x = MaxPooling2D(pool_size=(2, 1),
                     strides=(2, 1),
                     border_mode='valid',
예제 #13
0
def build_keras_trainer(game, config):
    """Build neural network model in Keras.

    Args:
        game (Game): Perfect information dynamics/game. Used to get information
                     like action/state space sizes etc.
        config (Config): Configuration loaded json .from file.

    Returns:
        KerasTrainer: Keras Sequential model wrapped in trainer object.
    """

    conv_filters = config.nn["conv_filters"]
    conv_kernel = config.nn["conv_kernel"]
    conv_stride = config.nn["conv_stride"]
    residual_bottleneck = config.nn["residual_bottleneck"]
    residual_filters = config.nn["residual_filters"]
    residual_kernel = config.nn["residual_kernel"]
    residual_num = config.nn["residual_num"]
    feature_extractor = config.nn["feature_extractor"]
    dense_size = config.nn["dense_size"]

    loss = config.nn['loss']
    l2_reg = config.nn["l2_regularizer"]
    lr = config.nn['lr']
    momentum = config.nn['momentum']

    DATA_FORMAT = image_data_format()
    BOARD_HEIGHT, BOARD_WIDTH = game.getBoardSize()
    ACTION_SIZE = game.getActionSize()

    def conv2d_n_batchnorm(x, filters, kernel_size, strides=1, shortcut=None):
        conv = Conv2D(filters,
                      kernel_size=kernel_size,
                      strides=strides,
                      padding="same",
                      kernel_regularizer=l2(l2_reg),
                      data_format=DATA_FORMAT)(x)

        if DATA_FORMAT == 'channels_first':
            bn = BatchNormalization(axis=1)(conv)
        else:
            bn = BatchNormalization(axis=3)(conv)

        if shortcut is not None:
            out = add([bn, shortcut])
        else:
            out = bn

        return Activation(activation='relu')(out)

    def residual_block(x, filters, bottleneck, kernel_size):
        y = conv2d_n_batchnorm(x, bottleneck, kernel_size=1, strides=1)
        y = conv2d_n_batchnorm(y, bottleneck, kernel_size, strides=1)
        return conv2d_n_batchnorm(y,
                                  filters,
                                  kernel_size=1,
                                  strides=1,
                                  shortcut=x)

    # Add batch dimension to inputs
    boards_input = Input(shape=(BOARD_HEIGHT, BOARD_WIDTH))
    if DATA_FORMAT == 'channels_first':
        x = Reshape((1, BOARD_HEIGHT, BOARD_WIDTH))(boards_input)
    else:
        x = Reshape((BOARD_HEIGHT, BOARD_WIDTH, 1))(boards_input)

    # Input convolution
    if conv_filters > 0:
        x = conv2d_n_batchnorm(x,
                               filters=conv_filters,
                               kernel_size=conv_kernel,
                               strides=conv_stride)

    # Tower of residual blocks
    if residual_filters > 0:
        if conv_filters != residual_filters:
            # Add additional layer to even out the number of filters between input CNN
            # and residual blocks, so that residual shortcut connection works properly
            x = conv2d_n_batchnorm(x,
                                   filters=residual_filters,
                                   kernel_size=residual_kernel,
                                   strides=1)
        for _ in range(residual_num):
            x = residual_block(x, residual_filters, residual_bottleneck,
                               residual_kernel)

    # Final feature extractors
    if feature_extractor == "agz":
        pi = Flatten()(conv2d_n_batchnorm(x,
                                          filters=2,
                                          kernel_size=1,
                                          strides=1))
        value = Flatten()(conv2d_n_batchnorm(x,
                                             filters=1,
                                             kernel_size=1,
                                             strides=1))
        value = Dense(dense_size,
                      activation='relu',
                      kernel_regularizer=l2(l2_reg))(value)
    elif feature_extractor == "avgpool":
        x = GlobalAveragePooling2D(data_format=DATA_FORMAT)(x)
        pi = value = Dense(dense_size,
                           activation='relu',
                           kernel_regularizer=l2(l2_reg))(x)
    elif feature_extractor == "flatten":
        x = Flatten()(x)
        pi = value = Dense(dense_size,
                           activation='relu',
                           kernel_regularizer=l2(l2_reg))(x)
    else:
        raise ValueError(
            "Unknown feature extractor! Possible values: 'agz', 'avgpool', 'flatten'"
        )

    # Heads
    pi = Dense(ACTION_SIZE,
               activation='softmax',
               kernel_regularizer=l2(l2_reg),
               name='pi')(pi)
    value = Dense(1,
                  activation='tanh',
                  kernel_regularizer=l2(l2_reg),
                  name='value')(value)

    # Create model
    model = Model(inputs=boards_input, outputs=[pi, value])

    # Compile model
    model.compile(loss=loss,
                  optimizer=SGD(lr=lr, momentum=momentum, nesterov=True),
                  metrics=['accuracy'])

    # Log model architecture
    model.summary(print_fn=lambda x: log.debug("%s", x))
    return KerasTrainer(model, config.training)
예제 #14
0
           name='conv_tanh_1')(input_img)
x = Conv2D(32, (3, 3), activation='tanh', padding='same',
           name='conv_tanh_2')(x)
x = MaxPooling2D((2, 2), padding='same', name='maxpool_1')(x)
x = Dropout((0.25), name='dropout_1')(x)
x = Flatten(name='flat_1')(x)
x = Dense(units=512, activation='tanh', name='dense_tanh_1')(x)
x = Dropout((0.5), name='dropout_2')(x)

encoded = Dense(units=latent_dim, activation='softmax',
                name='dens_softmax_1')(x)
# at this point the representation is 101-dimensional

x = Dense(units=512, activation='tanh')(encoded)
x = Dense(units=131072)(x)
x = Reshape((64, 64, 32), input_shape=(131072, ))(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='tanh', padding='same')(x)
x = Conv2D(32, (3, 3), activation='tanh', padding='same')(x)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='mse')
print(autoencoder.summary())

autoencoder.fit_generator(
    tuple_generator(train_generator),
    steps_per_epoch=train_samples / batch_size,
    epochs=epochs,
    validation_data=tuple_generator(validation_generator),
    validation_steps=nb_validation_samples)
# Shape info needed to build Decoder Model
shape = K.int_shape(x)

# Generate a 16-dim latent vector
x = Flatten()(x)
latent = Dense(latent_dim, name='latent_vector')(x)

# Instantiate Encoder Model
encoder = Model(inputs, latent, name='encoder')
encoder.summary()
plot_model(encoder, to_file='encoder.png', show_shapes=True)

# Build the Decoder Model
latent_inputs = Input(shape=(latent_dim,), name='decoder_input')
x = Dense(shape[1]*shape[2]*shape[3])(latent_inputs)
x = Reshape((shape[1], shape[2], shape[3]))(x)

# Stack of BN-ReLU-Transposed Conv2D-UpSampling2D blocks
for i in range(2):
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2DTranspose(filters=filters,
                        kernel_size=kernel_size,
                        padding='same')(x)
    x = UpSampling2D()(x)
    filters //= 2

x = Conv2DTranspose(filters=1,
                    kernel_size=kernel_size,
                    padding='same')(x)
예제 #16
0
# Layer 22
x = Conv2D(1024, (3, 3),
           strides=(1, 1),
           padding='same',
           name='conv_22',
           use_bias=False)(x)
x = BatchNormalization(name='norm_22')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 23
x = Conv2D(BOX * (4 + 1 + CLASS), (1, 1),
           strides=(1, 1),
           padding='same',
           name='conv_23')(x)
output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(x)

# small hack to allow true_boxes to be registered when Keras build the model
# for more information: https://github.com/fchollet/keras/issues/2790
output = Lambda(lambda args: args[0])([output, true_boxes])

model = Model([input_image, true_boxes], output)

# In[ ]:

model.summary()

# # Load pretrained weights

# **Load the weights originally provided by YOLO**
예제 #17
0
def Inception_Inflated3d(include_top=True,
                         weights=None,
                         input_tensor=None,
                         input_shape=None,
                         dropout_prob=0.0,
                         endpoint_logit=True,
                         classes=400):
	"""Instantiates the Inflated 3D Inception v1 architecture.
    Optionally loads weights pre-trained
    on Kinetics. Note that when using TensorFlow,
    for best performance you should set
    `image_data_format='channels_last'` in your Keras config
    at ~/.keras/keras.json.
    The model and the weights are compatible with both
    TensorFlow and Theano. The data format
    convention used by the model is the one
    specified in your Keras config file.
    Note that the default input frame(image) size for this model is 224x224.
    # Arguments
        include_top: whether to include the the classification 
            layer at the top of the network.
        weights: one of `None` (random initialization)
            or 'kinetics_only' (pre-training on Kinetics dataset only).
            or 'imagenet_and_kinetics' (pre-training on ImageNet and Kinetics datasets).
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(NUM_FRAMES, 224, 224, 3)` (with `channels_last` data format)
            or `(NUM_FRAMES, 3, 224, 224)` (with `channels_first` data format).
            It should have exactly 3 inputs channels.
            NUM_FRAMES should be no smaller than 8. The authors used 64
            frames per example for training and testing on kinetics dataset
            Also, Width and height should be no smaller than 32.
            E.g. `(64, 150, 150, 3)` would be one valid value.
        dropout_prob: optional, dropout probability applied in dropout layer
            after global average pooling layer. 
            0.0 means no dropout is applied, 1.0 means dropout is applied to all features.
            Note: Since Dropout is applied just before the classification
            layer, it is only useful when `include_top` is set to True.
        endpoint_logit: (boolean) optional. If True, the model's forward pass
            will end at producing logits. Otherwise, softmax is applied after producing
            the logits to produce the class probabilities prediction. Setting this parameter 
            to True is particularly useful when you want to combine results of rgb model
            and optical flow model.
            - `True` end model forward pass at logit output
            - `False` go further after logit to produce softmax predictions
            Note: This parameter is only useful when `include_top` is set to True.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """
	if not (weights in WEIGHTS_NAME or weights is None or os.path.exists(weights)):
		raise ValueError('The `weights` argument should be either '
		                 '`None` (random initialization) or %s' %
		                 str(WEIGHTS_NAME) + ' '
		                                     'or a valid path to a file containing `weights` values')

	if weights in WEIGHTS_NAME and include_top and classes != 400:
		raise ValueError('If using `weights` as one of these %s, with `include_top`'
		                 ' as true, `classes` should be 400' % str(WEIGHTS_NAME))

	# Determine proper input shape
	input_shape = _obtain_input_shape(
		input_shape,
		default_frame_size=224,
		min_frame_size=32,
		default_num_frames=64,
		min_num_frames=8,
		data_format=K.image_data_format(),
		require_flatten=include_top,
		weights=weights)

	if input_tensor is None:
		img_input = Input(shape=input_shape)
	else:
		if not K.is_keras_tensor(input_tensor):
			img_input = Input(tensor=input_tensor, shape=input_shape)
		else:
			img_input = input_tensor

	if K.image_data_format() == 'channels_first':
		channel_axis = 1
	else:
		channel_axis = 4

	# Downsampling via convolution (spatial and temporal)
	x = conv3d_bn(img_input, 64, 7, 7, 7, strides=(2, 2, 2), padding='same', name='Conv3d_1a_7x7')

	# Downsampling (spatial only)
	x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_2a_3x3')(x)
	x = conv3d_bn(x, 64, 1, 1, 1, strides=(1, 1, 1), padding='same', name='Conv3d_2b_1x1')
	x = conv3d_bn(x, 192, 3, 3, 3, strides=(1, 1, 1), padding='same', name='Conv3d_2c_3x3')

	# Downsampling (spatial only)
	x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_3a_3x3')(x)

	# Mixed 3b
	branch_0 = conv3d_bn(x, 64, 1, 1, 1, padding='same', name='Conv3d_3b_0a_1x1')

	branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_3b_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 128, 3, 3, 3, padding='same', name='Conv3d_3b_1b_3x3')

	branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_3b_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 32, 3, 3, 3, padding='same', name='Conv3d_3b_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3b_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 32, 1, 1, 1, padding='same', name='Conv3d_3b_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_3b')

	# Mixed 3c
	branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_0a_1x1')

	branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 192, 3, 3, 3, padding='same', name='Conv3d_3c_1b_3x3')

	branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_3c_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 96, 3, 3, 3, padding='same', name='Conv3d_3c_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3c_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_3c_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_3c')

	# Downsampling (spatial and temporal)
	x = MaxPooling3D((3, 3, 3), strides=(2, 2, 2), padding='same', name='MaxPool2d_4a_3x3')(x)

	# Mixed 4b
	branch_0 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_4b_0a_1x1')

	branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_4b_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 208, 3, 3, 3, padding='same', name='Conv3d_4b_1b_3x3')

	branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_4b_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 48, 3, 3, 3, padding='same', name='Conv3d_4b_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4b_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4b_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_4b')

	# Mixed 4c
	branch_0 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4c_0a_1x1')

	branch_1 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4c_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 224, 3, 3, 3, padding='same', name='Conv3d_4c_1b_3x3')

	branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4c_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4c_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4c_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4c_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_4c')

	# Mixed 4d
	branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_0a_1x1')

	branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 256, 3, 3, 3, padding='same', name='Conv3d_4d_1b_3x3')

	branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4d_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4d_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4d_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4d_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_4d')

	# Mixed 4e
	branch_0 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4e_0a_1x1')

	branch_1 = conv3d_bn(x, 144, 1, 1, 1, padding='same', name='Conv3d_4e_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 288, 3, 3, 3, padding='same', name='Conv3d_4e_1b_3x3')

	branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4e_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4e_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4e_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4e_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_4e')

	# Mixed 4f
	branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_4f_0a_1x1')

	branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4f_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_4f_1b_3x3')

	branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4f_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_4f_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4f_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_4f_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_4f')

	# Downsampling (spatial and temporal)
	x = MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding='same', name='MaxPool2d_5a_2x2')(x)

	# Mixed 5b
	branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_5b_0a_1x1')

	branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_5b_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_5b_1b_3x3')

	branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_5b_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5b_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5b_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5b_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_5b')

	# Mixed 5c
	branch_0 = conv3d_bn(x, 384, 1, 1, 1, padding='same', name='Conv3d_5c_0a_1x1')

	branch_1 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_5c_1a_1x1')
	branch_1 = conv3d_bn(branch_1, 384, 3, 3, 3, padding='same', name='Conv3d_5c_1b_3x3')

	branch_2 = conv3d_bn(x, 48, 1, 1, 1, padding='same', name='Conv3d_5c_2a_1x1')
	branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5c_2b_3x3')

	branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5c_3a_3x3')(x)
	branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5c_3b_1x1')

	x = layers.concatenate(
		[branch_0, branch_1, branch_2, branch_3],
		axis=channel_axis,
		name='Mixed_5c')

	if include_top:
		# Classification block
		x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x)
		x = Dropout(dropout_prob)(x)

		x = conv3d_bn(x, classes, 1, 1, 1, padding='same',
		              use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1')

		num_frames_remaining = int(x.shape[1])
		x = Reshape((num_frames_remaining, classes))(x)

		# logits (raw scores for each class)
		x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False),
		           output_shape=lambda s: (s[0], s[2]))(x)

		if not endpoint_logit:
			x = Activation('softmax', name='prediction')(x)
	else:
		h = int(x.shape[2])
		w = int(x.shape[3])
		x = AveragePooling3D((2, h, w), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x)

	inputs = img_input
	# create model
	model = Model(inputs, x, name='i3d_inception')

	# load weights
	if weights in WEIGHTS_NAME:
		if weights == WEIGHTS_NAME[0]:  # rgb_kinetics_only
			if include_top:
				weights_url = WEIGHTS_PATH['rgb_kinetics_only']
				model_name = 'i3d_inception_rgb_kinetics_only.h5'
			else:
				weights_url = WEIGHTS_PATH_NO_TOP['rgb_kinetics_only']
				model_name = 'i3d_inception_rgb_kinetics_only_no_top.h5'

		elif weights == WEIGHTS_NAME[1]:  # flow_kinetics_only
			if include_top:
				weights_url = WEIGHTS_PATH['flow_kinetics_only']
				model_name = 'i3d_inception_flow_kinetics_only.h5'
			else:
				weights_url = WEIGHTS_PATH_NO_TOP['flow_kinetics_only']
				model_name = 'i3d_inception_flow_kinetics_only_no_top.h5'

		elif weights == WEIGHTS_NAME[2]:  # rgb_imagenet_and_kinetics
			if include_top:
				weights_url = WEIGHTS_PATH['rgb_imagenet_and_kinetics']
				model_name = 'i3d_inception_rgb_imagenet_and_kinetics.h5'
			else:
				weights_url = WEIGHTS_PATH_NO_TOP['rgb_imagenet_and_kinetics']
				model_name = 'i3d_inception_rgb_imagenet_and_kinetics_no_top.h5'

		elif weights == WEIGHTS_NAME[3]:  # flow_imagenet_and_kinetics
			if include_top:
				weights_url = WEIGHTS_PATH['flow_imagenet_and_kinetics']
				model_name = 'i3d_inception_flow_imagenet_and_kinetics.h5'
			else:
				weights_url = WEIGHTS_PATH_NO_TOP['flow_imagenet_and_kinetics']
				model_name = 'i3d_inception_flow_imagenet_and_kinetics_no_top.h5'

		downloaded_weights_path = get_file(model_name, weights_url, cache_subdir='models')
		model.load_weights(downloaded_weights_path)

		if K.backend() == 'theano':
			layer_utils.convert_all_kernels_in_model(model)

		if K.image_data_format() == 'channels_first' and K.backend() == 'tensorflow':
			warnings.warn('You are using the TensorFlow backend, yet you '
			              'are using the Theano '
			              'image data format convention '
			              '(`image_data_format="channels_first"`). '
			              'For best performance, set '
			              '`image_data_format="channels_last"` in '
			              'your keras config '
			              'at ~/.keras/keras.json.')

	elif weights is not None:
		model.load_weights(weights)

	return model
예제 #18
0
	y_train = sequence.pad_sequences(y_trj, maxlen = maxLen, dtype='float', padding = 'post', value=0.)
else:
	maxLen = inputSize

#MODEL SETUP
#Setup 1D PCA-like time-lagged autoencoder

input_shape = Input(shape=(maxLen,3))

#Encoder:
inputLayer = Flatten()(input_shape)
encoded = Dense(bottleneck)(inputLayer)

#Decoder:
decoded = Dense(maxLen*3)(encoded)
reshaped = Reshape((maxLen,3))(decoded)

#Mask the padded data.
if(pad):
	decoded = Masking(0.)(decoded)

#Compile model
autoencoder = Model(input_shape,reshaped)
autoencoder.compile(optimizer='adadelta', loss='mean_squared_error')

#Train!
training_start = time.time()
autoencoder.fit(x_train,y_train,epochs=nEpochs, batch_size=batchSize)
training_end = time.time()

#See performance on training data
예제 #19
0
        processed_batch = batch.astype('bool')
        return processed_batch

    def process_reward(self, reward):
     #  return np.clip(reward, -1., 1.)
        return reward

# Next, we build our model. We use the same model that was described by Mnih et al. (2015).
input_shape = (num_zones, MAP_X, MAP_Y)
print input_shape
print  env.micro.map.getMapState().shape
assert input_shape == env.micro.map.getMapState().shape


model = Sequential()
model.add(Reshape((input_shape), input_shape=(WINDOW_LENGTH,) + input_shape))
if K.image_dim_ordering() == 'tf':
    print('tensorflow ordering')
    # (width, height, channels)
    model.add(Permute((2, 3, 1), input_shape=input_shape))
    permute_shape = (MAP_X, MAP_Y, num_zones)
elif K.image_dim_ordering() == 'th':
    # (channels, width, height)
    model.add(Permute((1, 2, 3), input_shape=input_shape))
    permute_shape = (num_zones, MAP_X, MAP_Y)
else:
    raise RuntimeError('Unknown image_dim_ordering.')

model.add(Convolution2D(32, (8, 8), strides=(2, 2), padding='same'))
model.add(Activation('relu'))
model.add(Convolution2D(64, (4, 4), strides=(2, 2), padding='same'))
예제 #20
0
def kimCNN(embedding_output_size, imput_size, vocab_size, num_labels=5,loss='categorical_crossentropy'):
    """
    Convolution neural network model for sentence classification.
    Parameters
    ----------
    embedding_output_size: Dimension of the embedding space.
    vocab_size: size of the vocabulary
    imput_size: number of features of the imput.
    num_labels: number of output labels
    Returns
    -------
    compiled keras model
    """
    print('Preparing embedding matrix.')

    embedding_layer = Embedding(input_dim=vocab_size,
                                output_dim=embedding_output_size,
                                input_length=imput_size,
                                trainable=True)

    print('Training model.')

    sequence_input = Input(shape=(imput_size,), dtype='int32')
    embedded_sequences = embedding_layer(sequence_input)
    print(embedded_sequences.shape)

    # add first conv filter
    embedded_sequences = Reshape((imput_size, embedding_output_size, 1))(embedded_sequences)
    x = Conv2D(100, (5, embedding_output_size), activation='relu')(embedded_sequences)
    x = MaxPooling2D((imput_size - 5 + 1, 1))(x)

    # add second conv filter.
    y = Conv2D(100, (4, embedding_output_size), activation='relu')(embedded_sequences)
    y = MaxPooling2D((imput_size - 4 + 1, 1))(y)

    # add third conv filter.
    z = Conv2D(100, (3, embedding_output_size), activation='relu')(embedded_sequences)
    z = MaxPooling2D((imput_size - 3 + 1, 1))(z)

    # concate the conv layers
    alpha = concatenate([x, y, z])

    # flatted the pooled features.
    alpha = Flatten()(alpha)

    # dropout
    alpha = Dropout(0.5)(alpha)

    # predictions
    preds = Dense(num_labels, activation='softmax')(alpha)

    # build model
    model = Model(sequence_input, preds)
    adadelta = optimizers.Adadelta()

    model.compile(loss=loss,
                  optimizer=adadelta,
                  metrics=['acc'])
    model.summary()

    return model
    input_data_test = np.load('vectorized_883_400_test.npy')
else:
    input_data = build_input('train', 400)
    input_data_test = build_input('test', 400)

output_data = build_output('train')
output_data_test = build_output('test')

##  CNN classifier ######
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dropout

##Building CNN
model = Sequential()
model.add(
    Embedding(int(np.max(input_data)), 200, input_length=input_data.shape[1]))
model.add(Reshape((200, input_data.shape[1])))
model.add(Conv1D(50, 10))
model.add(MaxPooling1D(5))
model.add(Dropout(0.3))
model.add(Conv1D(50, 10))
model.add(MaxPooling1D(5))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.3))
model.add((Dense(output_data.shape[1], activation='sigmoid')))

##Compilation and training
model.compile(loss='binary_crossentropy', optimizer='nadam')
예제 #22
0
def myCrossLayer(nb_flow=2,
                 map_height=16,
                 map_width=8,
                 nb_layers=3,
                 window_len=12,
                 nb_filter=64,
                 external_dim=None,
                 filter_size=3):
    """
    the final model
    :param nb_flow: number of measurements, also number of channels of each picture sample
    :param map_height: grid map height, here is 16
    :param map_width: grid map width, here is 8
    :param nb_layers: number of cnn layers
    :return:
    """
    window_len_pic_fea = []
    main_inputs = []
    if external_dim == None:
        for i in range(window_len):
            inputs = Input(shape=(nb_flow, map_height, map_width))
            main_inputs.append(inputs)
            cnn_fea = dense_conv3D(nb_filter=nb_filter,
                                   nb_col=filter_size,
                                   nb_row=filter_size,
                                   padding='same',
                                   nb_layers=nb_layers,
                                   dense_units=1024,
                                   dropout_rate=0.5)(inputs)
            # cnn_fea_flatten = Reshape(([nb_layers * 1024]))(cnn_fea)
            cnn_fea_flatten = Reshape(([1024]))(cnn_fea)
            # cnn_fea_flatten = Dropout(rate=0.3)(cnn_fea_flatten)
            # cnn_fea_flatten = expand_dims(cnn_fea_flatten, axis=1)
            cnn_fea_flatten = Lambda(expand_dim_backend)(cnn_fea_flatten)
            window_len_pic_fea.append(cnn_fea_flatten)
    # add external feature here
    if external_dim != None and external_dim > 0:
        for i in range(window_len):
            # todo : use two tensor to represent the data and meta_data respectively
            inputs = Input(shape=((nb_flow, map_height, map_width),
                                  external_dim))
            main_inputs.append(inputs)
            inputs_0 = inputs
            inputs_1 = inputs
            cnn_fea = dense_conv3D(nb_filter=nb_filter,
                                   nb_col=filter_size,
                                   nb_row=filter_size,
                                   padding='same',
                                   nb_layers=nb_layers,
                                   dense_units=1024,
                                   dropout_rate=0.5)(inputs_0)
            # cnn_fea_flatten = Reshape(([nb_layers * 1024]))(cnn_fea)
            cnn_fea_flatten = Reshape(([1024]))(cnn_fea)
            # cnn_fea_flatten = Dropout(rate=0.3)(cnn_fea_flatten)
            # cnn_fea_flatten = expand_dims(cnn_fea_flatten, axis=1)
            cnn_fea_flatten = Lambda(expand_dim_backend)(cnn_fea_flatten)
            window_len_pic_fea.append(cnn_fea_flatten)

        external_input = inputs_1
        # external_input = Input(shape=(external_dim,))
        main_inputs.append(external_input)
        # todo: change the code here
        embedding = Dense(nb_layers * 1024, activation='relu')(external_input)
        external_out = Lambda(expand_dim_backend)(embedding)
        new_concatenate_fea = []
        for pic_fea in window_len_pic_fea:
            tmp_con = Concatenate(axis=-1)([pic_fea, external_out])
            new_concatenate_fea.append(tmp_con)
        window_len_pic_fea = new_concatenate_fea

    outputs = add_densenet(nb_flow=nb_flow,
                           map_height=map_height,
                           map_width=map_width)(window_len_pic_fea)
    # outputs = add_lstm(nb_flow=nb_flow, map_height=map_height, map_width=map_width)(window_len_pic_fea)
    # outputs = attention_after_LSTM(nb_flow=nb_flow, map_height=map_height,
    #                                map_width=map_width, window_len=window_len)(window_len_pic_fea)
    model = Model(inputs=main_inputs, outputs=outputs)
    return model
예제 #23
0
    PReLU(alpha_initializer='zeros',
          alpha_regularizer=None,
          alpha_constraint=None,
          shared_axes=[1, 2]))
# model.add(BatchNormalization())
model.add(Conv2D(c, (5, 5), padding='same', strides=1))
model.add(
    PReLU(alpha_initializer='zeros',
          alpha_regularizer=None,
          alpha_constraint=None,
          shared_axes=[1, 2]))
# model.add(BatchNormalization(name='last'))
model.add(Flatten(name='last'))
model.add(ChannelNormalizer(sqrtk, name='normal'))
model.add(ChannelNoise(std, name='noise'))
model.add(Reshape([8, 8, c]))

# Decoder
model.add(Conv2DTranspose(32, (5, 5), padding='same', strides=1))
model.add(
    PReLU(alpha_initializer='zeros',
          alpha_regularizer=None,
          alpha_constraint=None,
          shared_axes=[1, 2]))
# model.add(BatchNormalization())
model.add(Conv2DTranspose(32, (5, 5), padding='same', strides=1))
model.add(
    PReLU(alpha_initializer='zeros',
          alpha_regularizer=None,
          alpha_constraint=None,
          shared_axes=[1, 2]))
예제 #24
0
    def __init__(self):
        super(CNNLSTMModel, self).__init__()
        _NUM_CLASSES = 4
        input_shape = (
            128,
            128,
            1,
        )
        self.input_layer = Input(shape=input_shape)
        bn_axis = 2
        # Block 1
        self.bn1 = BatchNormalization(axis=bn_axis)
        self.conv1 = Conv2D(64, (3, 3),
                            strides=(1, 1),
                            activation='relu',
                            padding='same',
                            name='conv1')
        self.pool1 = MaxPooling2D((2, 2),
                                  strides=(2, 2),
                                  padding='same',
                                  name='pool1')

        # Block 2
        self.bn2 = BatchNormalization(axis=bn_axis)
        self.conv2 = Conv2D(128, (3, 3),
                            strides=(1, 1),
                            activation='relu',
                            padding='same',
                            name='conv2')
        self.pool2 = MaxPooling2D((2, 2),
                                  strides=(2, 2),
                                  padding='same',
                                  name='pool2')

        # Block 3
        self.bn3 = BatchNormalization(axis=bn_axis)
        self.conv3_1 = Conv2D(256, (3, 3),
                              strides=(1, 1),
                              activation='relu',
                              padding='same',
                              name='conv3/conv3_1')
        self.conv3_2 = Conv2D(256, (3, 3),
                              strides=(1, 1),
                              activation='relu',
                              padding='same',
                              name='conv3/conv3_2')
        self.pool3 = MaxPooling2D((2, 2),
                                  strides=(2, 2),
                                  padding='same',
                                  name='pool3')

        # Block 4
        self.bn4 = BatchNormalization(axis=bn_axis)
        self.conv4_1 = Conv2D(512, (3, 3),
                              strides=(1, 1),
                              activation='relu',
                              padding='same',
                              name='conv4/conv4_1')
        self.conv4_2 = Conv2D(512, (3, 3),
                              strides=(1, 1),
                              activation='relu',
                              padding='same',
                              name='conv4/conv4_2')
        self.pool4 = MaxPooling2D((2, 2),
                                  strides=(2, 2),
                                  padding='same',
                                  name='pool4')

        self.reshape1 = Reshape((8, -1))
        self.lstm1 = LSTM(128,
                          return_sequences=True,
                          return_state=False,
                          kernel_regularizer=regularizers.l2(0.01),
                          stateful=False)
        self.lstm2 = LSTM(128,
                          return_sequences=True,
                          return_state=False,
                          kernel_regularizer=regularizers.l2(0.01),
                          stateful=False)
        self.dropout1 = Dropout(0.5)
        self.dense1 = Dense(_NUM_CLASSES, activation='softmax')
        outputs = self.create_model()
        self.inputs = self.input_layer
        self.outputs = outputs
        self.build(input_shape)
예제 #25
0
	def CreateModel(self):
		'''
		定义CNN/LSTM/CTC模型,使用函数式模型
		输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s)
		隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2
		隐藏层:全连接层
		输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数,
		CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出
		
		'''
		
		input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1))
		
		layer_h1 = Conv2D(32, (3,3), use_bias=False, activation='relu', padding='same', kernel_initializer='he_normal')(input_data) # 卷积层
		layer_h1 = Dropout(0.05)(layer_h1)
		layer_h2 = Conv2D(32, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层
		layer_h3 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h2) # 池化层
		#layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合
		layer_h3 = Dropout(0.05)(layer_h3)
		layer_h4 = Conv2D(64, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层
		layer_h4 = Dropout(0.1)(layer_h4)
		layer_h5 = Conv2D(64, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层
		layer_h6 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h5) # 池化层
		
		layer_h6 = Dropout(0.1)(layer_h6)
		layer_h7 = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层
		layer_h7 = Dropout(0.15)(layer_h7)
		layer_h8 = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层
		layer_h9 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h8) # 池化层
		
		layer_h9 = Dropout(0.15)(layer_h9)
		layer_h10 = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层
		layer_h10 = Dropout(0.2)(layer_h10)
		layer_h11 = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层
		layer_h12 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层
		
		layer_h12 = Dropout(0.2)(layer_h12)
		layer_h13 = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h12) # 卷积层
		layer_h13 = Dropout(0.2)(layer_h13)
		layer_h14 = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h13) # 卷积层
		layer_h15 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h14) # 池化层
		
		#test=Model(inputs = input_data, outputs = layer_h12)
		#test.summary()
		
		layer_h16 = Reshape((200, 3200))(layer_h15) #Reshape层
		#layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层
		#layer_h6 = Dropout(0.2)(layer_h5) # 随机中断部分神经网络连接,防止过拟合
		layer_h16 = Dropout(0.3)(layer_h16)
		layer_h17 = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h16) # 全连接层
		layer_h17 = Dropout(0.3)(layer_h17)
		layer_h18 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h17) # 全连接层
		
		y_pred = Activation('softmax', name='Activation0')(layer_h18)
		model_data = Model(inputs = input_data, outputs = y_pred)
		#model_data.summary()
		
		labels = Input(name='the_labels', shape=[self.label_max_string_length], dtype='float32')
		input_length = Input(name='input_length', shape=[1], dtype='int64')
		label_length = Input(name='label_length', shape=[1], dtype='int64')
		# Keras doesn't currently support loss funcs with extra parameters
		# so CTC loss is implemented in a lambda layer
		
		#layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC
		loss_out = Lambda(self.ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
		
		
		
		model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)
		model = ParallelModel(model, NUM_GPU)
		
		model.summary()
		
		# clipnorm seems to speeds up convergence
		#sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
		#opt = Adadelta(lr = 0.01, rho = 0.95, epsilon = 1e-06)
		opt = Adam(lr = 0.001, beta_1 = 0.9, beta_2 = 0.999, decay = 0.0, epsilon = 10e-8)
		#model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
		model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = opt)
		
		
		# captures output of softmax so we can decode the output during visualization
		test_func = K.function([input_data], [y_pred])
		
		#print('[*提示] 创建模型成功,模型编译成功')
		print('[*Info] Create Model Successful, Compiles Model Successful. ')
		return model, model_data
예제 #26
0
def Inception_Inflated3d(include_top=True,
                         weights=None,
                         input_tensor=None,
                         input_shape=None,
                         dropout_prob=0.0,
                         endpoint_logit=True,
                         classes=400):

    if not (weights in WEIGHTS_NAME or weights is None
            or os.path.exists(weights)):
        raise ValueError(
            'The `weights` argument should be either '
            '`None` (random initialization) or %s' % str(WEIGHTS_NAME) + ' '
            'or a valid path to a file containing `weights` values')

    if weights in WEIGHTS_NAME and include_top and classes != 400:
        raise ValueError(
            'If using `weights` as one of these %s, with `include_top`'
            ' as true, `classes` should be 400' % str(WEIGHTS_NAME))

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_frame_size=224,
                                      min_frame_size=32,
                                      default_num_frames=64,
                                      min_num_frames=8,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    if K.image_data_format() == 'channels_first':
        channel_axis = 1
    else:
        channel_axis = 4

    # Downsampling via convolution (spatial and temporal)
    x = conv3d_bn(img_input,
                  64,
                  7,
                  7,
                  7,
                  strides=(2, 2, 2),
                  padding='same',
                  name='Conv3d_1a_7x7')
    #print(x)
    now = datetime.datetime.now()
    timestamp = str(now)
    #np.save('/home/mech/btech/me1130654/keras-kinetics-i3d/'+timestamp+'.npy', x)

    # Downsampling (spatial only)
    x = MaxPooling3D((1, 3, 3),
                     strides=(1, 2, 2),
                     padding='same',
                     name='MaxPool2d_2a_3x3')(x)
    x = conv3d_bn(x,
                  64,
                  1,
                  1,
                  1,
                  strides=(1, 1, 1),
                  padding='same',
                  name='Conv3d_2b_1x1')
    x = conv3d_bn(x,
                  192,
                  3,
                  3,
                  3,
                  strides=(1, 1, 1),
                  padding='same',
                  name='Conv3d_2c_3x3')

    # Downsampling (spatial only)
    x = MaxPooling3D((1, 3, 3),
                     strides=(1, 2, 2),
                     padding='same',
                     name='MaxPool2d_3a_3x3')(x)

    # Mixed 3b
    branch_0 = conv3d_bn(x,
                         64,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3b_0a_1x1')

    branch_1 = conv3d_bn(x,
                         96,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3b_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         128,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_3b_1b_3x3')

    branch_2 = conv3d_bn(x,
                         16,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3b_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         32,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_3b_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_3b_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         32,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3b_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_3b')

    # Mixed 3c
    branch_0 = conv3d_bn(x,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3c_0a_1x1')

    branch_1 = conv3d_bn(x,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3c_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         192,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_3c_1b_3x3')

    branch_2 = conv3d_bn(x,
                         32,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3c_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         96,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_3c_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_3c_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         64,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_3c_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_3c')

    # Downsampling (spatial and temporal)
    x = MaxPooling3D((3, 3, 3),
                     strides=(2, 2, 2),
                     padding='same',
                     name='MaxPool2d_4a_3x3')(x)

    # Mixed 4b
    branch_0 = conv3d_bn(x,
                         192,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4b_0a_1x1')

    branch_1 = conv3d_bn(x,
                         96,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4b_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         208,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4b_1b_3x3')

    branch_2 = conv3d_bn(x,
                         16,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4b_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         48,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4b_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_4b_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         64,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4b_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_4b')

    # Mixed 4c
    branch_0 = conv3d_bn(x,
                         160,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4c_0a_1x1')

    branch_1 = conv3d_bn(x,
                         112,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4c_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         224,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4c_1b_3x3')

    branch_2 = conv3d_bn(x,
                         24,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4c_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         64,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4c_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_4c_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         64,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4c_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_4c')

    # Mixed 4d
    branch_0 = conv3d_bn(x,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4d_0a_1x1')

    branch_1 = conv3d_bn(x,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4d_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         256,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4d_1b_3x3')

    branch_2 = conv3d_bn(x,
                         24,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4d_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         64,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4d_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_4d_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         64,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4d_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_4d')

    # Mixed 4e
    branch_0 = conv3d_bn(x,
                         112,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4e_0a_1x1')

    branch_1 = conv3d_bn(x,
                         144,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4e_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         288,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4e_1b_3x3')

    branch_2 = conv3d_bn(x,
                         32,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4e_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         64,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4e_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_4e_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         64,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4e_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_4e')

    # Mixed 4f
    branch_0 = conv3d_bn(x,
                         256,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4f_0a_1x1')

    branch_1 = conv3d_bn(x,
                         160,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4f_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         320,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4f_1b_3x3')

    branch_2 = conv3d_bn(x,
                         32,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4f_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         128,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_4f_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_4f_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_4f_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_4f')

    # Downsampling (spatial and temporal)
    x = MaxPooling3D((2, 2, 2),
                     strides=(2, 2, 2),
                     padding='same',
                     name='MaxPool2d_5a_2x2')(x)

    # Mixed 5b
    branch_0 = conv3d_bn(x,
                         256,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5b_0a_1x1')

    branch_1 = conv3d_bn(x,
                         160,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5b_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         320,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_5b_1b_3x3')

    branch_2 = conv3d_bn(x,
                         32,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5b_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         128,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_5b_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_5b_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5b_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_5b')

    # Mixed 5c
    branch_0 = conv3d_bn(x,
                         384,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5c_0a_1x1')

    branch_1 = conv3d_bn(x,
                         192,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5c_1a_1x1')
    branch_1 = conv3d_bn(branch_1,
                         384,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_5c_1b_3x3')

    branch_2 = conv3d_bn(x,
                         48,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5c_2a_1x1')
    branch_2 = conv3d_bn(branch_2,
                         128,
                         3,
                         3,
                         3,
                         padding='same',
                         name='Conv3d_5c_2b_3x3')

    branch_3 = MaxPooling3D((3, 3, 3),
                            strides=(1, 1, 1),
                            padding='same',
                            name='MaxPool2d_5c_3a_3x3')(x)
    branch_3 = conv3d_bn(branch_3,
                         128,
                         1,
                         1,
                         1,
                         padding='same',
                         name='Conv3d_5c_3b_1x1')

    x = layers.concatenate([branch_0, branch_1, branch_2, branch_3],
                           axis=channel_axis,
                           name='Mixed_5c')

    if include_top:
        # Classification block
        x = AveragePooling3D((2, 7, 7),
                             strides=(1, 1, 1),
                             padding='valid',
                             name='global_avg_pool')(x)
        print(x.shape)
        x = Dropout(dropout_prob)(x)

        x = conv3d_bn(x,
                      classes,
                      1,
                      1,
                      1,
                      padding='same',
                      use_bias=True,
                      use_activation_fn=False,
                      use_bn=False,
                      name='Conv3d_6a_1x1')
        print(x.shape)

        num_frames_remaining = int(x.shape[1])
        x = Reshape((num_frames_remaining, classes))(x)
        print(x.shape, num_frames_remaining)

        # logits (raw scores for each class)
        x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False),
                   output_shape=lambda s: (s[0], s[2]))(x)

        if not endpoint_logit:
            x = Activation('softmax', name='prediction')(x)
    else:
        h = int(x.shape[2])
        w = int(x.shape[3])
        #   print("h and w", h, w)
        x = AveragePooling3D((2, h, w),
                             strides=(1, 1, 1),
                             padding='valid',
                             name='global_avg_pool')(x)
        print('droput used')
        x = Dropout(dropout_prob)(x)
        x = conv3d_bn(x,
                      classes,
                      1,
                      1,
                      1,
                      padding='same',
                      use_bias=True,
                      use_activation_fn=False,
                      use_bn=False,
                      name='Conv3d_6a_1x1')

        x = Reshape((-1, classes))(x)

        # logits (raw scores for each class)
        x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False),
                   output_shape=lambda s: (None, classes))(x)

        if not endpoint_logit:
            x = Activation('softmax', name='prediction')(x)

    inputs = img_input
    # create model
    model = Model(inputs, x, name='i3d_inception')

    # load weights
    if weights in WEIGHTS_NAME:
        if weights == WEIGHTS_NAME[0]:  # rgb_kinetics_only
            if include_top:
                model_weights_path = WEIGHTS_PATH['rgb_kinetics_only']
                # model_name = 'i3d_inception_rgb_kinetics_only.h5'
            else:
                model_weights_path = WEIGHTS_PATH_NO_TOP['rgb_kinetics_only']
                # model_name = 'i3d_inception_rgb_kinetics_only_no_top.h5'

        elif weights == WEIGHTS_NAME[1]:  # flow_kinetics_only
            if include_top:
                model_weights_path = WEIGHTS_PATH['flow_kinetics_only']
                # model_name = 'i3d_inception_flow_kinetics_only.h5'
            else:
                model_weights_path = WEIGHTS_PATH_NO_TOP['flow_kinetics_only']
                # model_name = 'i3d_inception_flow_kinetics_only_no_top.h5'

        elif weights == WEIGHTS_NAME[2]:  # rgb_imagenet_and_kinetics
            if include_top:
                model_weights_path = WEIGHTS_PATH['rgb_imagenet_and_kinetics']
                # model_name = 'i3d_inception_rgb_imagenet_and_kinetics.h5'
            else:
                model_weights_path = WEIGHTS_PATH_NO_TOP[
                    'rgb_imagenet_and_kinetics']
                # model_name = 'i3d_inception_rgb_imagenet_and_kinetics_no_top.h5'

        elif weights == WEIGHTS_NAME[3]:  # flow_imagenet_and_kinetics
            if include_top:
                model_weights_path = WEIGHTS_PATH['flow_imagenet_and_kinetics']
                # model_name = 'i3d_inception_flow_imagenet_and_kinetics.h5'
            else:
                model_weights_path = WEIGHTS_PATH_NO_TOP[
                    'flow_imagenet_and_kinetics']
                # model_name = 'i3d_inception_flow_imagenet_and_kinetics_no_top.h5'

        # downloaded_weights_path = get_file(model_name, weights_url, cache_subdir='models')
        model.load_weights(model_weights_path, by_name=True)

        if K.backend() == 'theano':
            layer_utils.convert_all_kernels_in_model(model)

        if K.image_data_format() == 'channels_first' and K.backend(
        ) == 'tensorflow':
            warnings.warn('You are using the TensorFlow backend, yet you '
                          'are using the Theano '
                          'image data format convention '
                          '(`image_data_format="channels_first"`). '
                          'For best performance, set '
                          '`image_data_format="channels_last"` in '
                          'your keras config '
                          'at ~/.keras/keras.json.')

    elif weights is not None:
        model.load_weights(weights, by_name=True)

    return model
예제 #27
0
def attentive_reader(num_words, embeddings_matrix, ce_loader, scope,
                     embedding_dim=64):
    # (batch, input_len) => (batch, input_len, embedding_dim)
    q_input = Input(shape=(QUESTION_LEN,), name="q_input")
    a_input = Input(shape=(ANSWER_LEN,), name="a_input")
    c_input = Input(shape=(CONTEXT_LEN,), name="c_input")

    q_emb = Embedding(input_dim=num_words + 1,  # word 0 used for padding
                      output_dim=embedding_dim,
                      weights=[embeddings_matrix],
                      input_length=QUESTION_LEN,
                      name="embedding_q_" + scope,
                      mask_zero=False,
                      trainable=False)
    a_emb = Embedding(input_dim=num_words + 1,  # word 0 used for padding
                      output_dim=embedding_dim,
                      weights=[embeddings_matrix],
                      input_length=ANSWER_LEN,
                      name="embedding_a_" + scope,
                      mask_zero=False,
                      trainable=False)
    c_emb = Embedding(input_dim=num_words + 1,  # word 0 used for padding
                      output_dim=embedding_dim,
                      weights=[embeddings_matrix],
                      input_length=CONTEXT_LEN,
                      name="embedding_c_" + scope,
                      mask_zero=False,
                      trainable=False)

    q = q_emb(q_input)
    a = a_emb(a_input)
    c = c_emb(c_input)

    q = TimeDistributed(Dense(300, activation='tanh'))(q)
    a = TimeDistributed(Dense(300, activation='tanh'))(a)
    c = TimeDistributed(Dense(300, activation='tanh'))(c)

    # q = Dropout(0.25)(q)
    # a = Dropout(0.25)(a)
    # c = Dropout(0.25)(c)

    q_lstm = Bidirectional(LSTM(50, recurrent_dropout=0.35))(q)
    c_lstm = Bidirectional(LSTM(50, recurrent_dropout=0.35,
                                return_sequences=True))(c)

    aux1 = TimeDistributed(Dense(200, activation=None,
                                 use_bias=False))(c_lstm)

    aux2 = Dense(200, activation=None, use_bias=False)(q_lstm)
    aux2 = RepeatVector(CONTEXT_LEN)(aux2)

    mt = Add()([aux1, aux2])
    mt = TimeDistributed(Activation('tanh'))(mt)

    st = TimeDistributed(Dense(1, activation=None, use_bias=False))(mt)
    st = Reshape((CONTEXT_LEN,))(st)
    st = Activation('softmax')(st)
    st = Reshape((CONTEXT_LEN, 1))(st)

    c_lstm = Permute((2, 1))(c_lstm)
    r = Lambda(lambda x: K.batch_dot(x[0], x[1]))([c_lstm, st])
    r = Reshape((-1,))(r)

    # Combine document attention and query (question).
    aux1 = Dense(450, activation=None, use_bias=False)(q_lstm)
    r = Dense(450, activation=None, use_bias=False)(r)

    gAS = Add()([r, aux1])
    gAS = Activation('tanh')(gAS)

    a_lstm = Bidirectional(LSTM(50, recurrent_dropout=0.25))(a)
    cqa = concatenate([gAS, a_lstm], axis=1)

    cqa = Dense(250, activation='relu')(cqa)

    cqa = Dropout(0.40)(cqa)

    cqa = Dense(250, activation='relu')(cqa)

    cqa = Dropout(0.15)(cqa)

    output = Dense(2, activation='softmax')(cqa)
    model = Model(inputs=[q_input,
                          a_input, c_input], outputs=[output])
    model.compile(loss=categorical_crossentropy,
                  optimizer='adam',
                  metrics=['accuracy'])
    plot_model(model, to_file='2way_model.png', show_shapes=True)
    return model
예제 #28
0
    pass
print('directory_name: ' + directory_name)
print('path_to_dir: ' + path_to_dir)

# Model
## ======================================================================================================
print("Creating Model...")

inputs = Input(shape=(sequence_length, ), dtype='int32')
embedding = Embedding(input_dim=len(word_index) + 1,
                      weights=[embedding_matrix],
                      output_dim=embedding_dim,
                      input_length=sequence_length,
                      trainable=True)(inputs)
dropout_1 = Dropout(drop)(embedding)
reshape1 = Reshape((sequence_length, embedding_dim, 1))(dropout_1)
# Conv1
conv_1 = Conv2D(num_filters,
                kernel_size=(filter_sizes[0], embedding_dim),
                use_bias=True,
                strides=1,
                padding='valid',
                activation=activation1,
                name='conv_1')(reshape1)
maxpool_1 = MaxPool2D(pool_size=pool_size,
                      strides=stride_size,
                      padding=padding,
                      name='pool_1')(conv_1)
maxpool_1_reshape = Reshape(
    (int(maxpool_1.shape[1]), int(maxpool_1.shape[3]), 1),
    name='pool_1_reshaped')(maxpool_1)
예제 #29
0
            def YOLOMODEL(path):
                input_image = Input(shape=(input_size, input_size, 3))
                true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4))

                # Layer 1
                x = Conv2D(32, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_1',
                           use_bias=False)(input_image)
                x = BatchNormalization(name='norm_1')(x)
                x = LeakyReLU(alpha=0.1)(x)
                x = MaxPooling2D(pool_size=(2, 2))(x)

                # Layer 2
                x = Conv2D(64, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_2',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_2')(x)
                x = LeakyReLU(alpha=0.1)(x)
                x = MaxPooling2D(pool_size=(2, 2))(x)

                # Layer 3
                x = Conv2D(128, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_3',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_3')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 4
                x = Conv2D(64, (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='conv_4',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_4')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 5
                x = Conv2D(128, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_5',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_5')(x)
                x = LeakyReLU(alpha=0.1)(x)
                x = MaxPooling2D(pool_size=(2, 2))(x)

                # Layer 6
                x = Conv2D(256, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_6',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_6')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 7
                x = Conv2D(128, (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='conv_7',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_7')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 8
                x = Conv2D(256, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_8',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_8')(x)
                x = LeakyReLU(alpha=0.1)(x)
                x = MaxPooling2D(pool_size=(2, 2))(x)

                # Layer 9
                x = Conv2D(512, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_9',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_9')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 10
                x = Conv2D(256, (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='conv_10',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_10')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 11
                x = Conv2D(512, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_11',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_11')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 12
                x = Conv2D(256, (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='conv_12',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_12')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 13
                x = Conv2D(512, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_13',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_13')(x)
                x = LeakyReLU(alpha=0.1)(x)

                skip_connection = x

                x = MaxPooling2D(pool_size=(2, 2))(x)

                # Layer 14
                x = Conv2D(1024, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_14',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_14')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 15
                x = Conv2D(512, (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='conv_15',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_15')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 16
                x = Conv2D(1024, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_16',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_16')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 17
                x = Conv2D(512, (1, 1),
                           strides=(1, 1),
                           padding='same',
                           name='conv_17',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_17')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 18
                x = Conv2D(1024, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_18',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_18')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 19
                x = Conv2D(1024, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_19',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_19')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 20
                x = Conv2D(1024, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_20',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_20')(x)
                x = LeakyReLU(alpha=0.1)(x)

                # Layer 21
                skip_connection = Conv2D(64, (1, 1),
                                         strides=(1, 1),
                                         padding='same',
                                         name='conv_21',
                                         use_bias=False)(skip_connection)
                skip_connection = BatchNormalization(
                    name='norm_21')(skip_connection)
                skip_connection = LeakyReLU(alpha=0.1)(skip_connection)
                skip_connection = Lambda(space_to_depth_x2)(skip_connection)

                x = concatenate([skip_connection, x])

                # Layer 22
                x = Conv2D(1024, (3, 3),
                           strides=(1, 1),
                           padding='same',
                           name='conv_22',
                           use_bias=False)(x)
                x = BatchNormalization(name='norm_22')(x)
                x = LeakyReLU(alpha=0.1)(x)
                feature_extractor = Model(input_image, x, name='FULLYOLO')
                features = feature_extractor(input_image)

                grid_h, grid_w = feature_extractor.get_output_shape_at(-1)[1:3]

                # make the object detection layer
                output = Conv2D(nb_box * (4 + 1 + nb_class), (1, 1),
                                strides=(1, 1),
                                padding='same',
                                name='conv_23',
                                kernel_initializer='lecun_normal')(features)
                output = Reshape(
                    (grid_h, grid_w, nb_box, 4 + 1 + nb_class))(output)
                output = Lambda(lambda args: args[0])([output, true_boxes])

                model = Model([input_image, true_boxes], output)

                # initialize the weights of the detection layer
                layer = model.layers[-4]
                weights = layer.get_weights()

                new_kernel = np.random.normal(
                    size=weights[0].shape) / (grid_h * grid_w)
                new_bias = np.random.normal(size=weights[1].shape) / (grid_h *
                                                                      grid_w)

                layer.set_weights([new_kernel, new_bias])

                model.load_weights(path)

                #print(model.summary())
                return model
예제 #30
0
def make_generator(dense=True, labels_size=10):
    """Creates a generator model that takes a 100-dimensional noise vector as a "seed", and outputs images
    of size 28x28x1."""
    model = Sequential()

    # ------------------------------ Layer 1: Dense + LeakyReLu ---------------------------------------
    if dense:
        model.add(Dense(1024, input_dim=100 + labels_size))
        model.add(LeakyReLU())
        model.add(Dense(128 * 7 * 7))
    else:
        model.add(Dense(128 * 7 * 7, input_dim=100 + labels_size))

    # ------------------------------ Layer 2: Dense + LeakyReLu ---------------------------------------

    model.add(BatchNormalization())
    model.add(LeakyReLU())

    # - - - - - - - - - - - - - - - - - - - Reshape  - - - - - - - - - - - - - - - -
    if K.image_data_format() == 'channels_first':
        # size: 128 x 7 x 7
        model.add(Reshape((128, 7, 7), input_shape=(128 * 7 * 7, )))
        bn_axis = 1  # first
    else:
        # size: 7 x 7 x 128
        model.add(Reshape((7, 7, 128), input_shape=(128 * 7 * 7, )))
        bn_axis = -1  # last

    # ------------------------------ Layer 3: DeConv2D + LeakyReLu ---------------------------------------
    model.add(
        Conv2DTranspose(filters=128,
                        kernel_size=(5, 5),
                        strides=2,
                        padding='same'))
    model.add(BatchNormalization(axis=bn_axis))
    model.add(LeakyReLU())

    # ------------------------------ Layer 4: Conv2D + LeakyReLu ---------------------------------------
    model.add(Convolution2D(64, (5, 5), padding='same'))
    model.add(BatchNormalization(axis=bn_axis))
    model.add(LeakyReLU())

    # ------------------------------ Layer 5: DeConv2D + LeakyReLu ---------------------------------------
    model.add(Conv2DTranspose(64, (5, 5), strides=2, padding='same'))
    model.add(BatchNormalization(axis=bn_axis))
    model.add(LeakyReLU())

    # ------------------------------ Layer 6: Conv2D + Tanh ---------------------------------------
    # Because we normalized training inputs to lie in the range [-1, 1],
    # the tanh function should be used for the output of the generator to ensure its output
    # also lies in this range.
    model.add(Convolution2D(1, (5, 5), padding='same', activation='tanh'))

    # our idea:
    # seed 100
    # layer1: dense 1024
    # layer2: dense 7*7*128
    # reshape 7 x 7 x 128
    # layer3: Deconv 14 x 14 x 128
    # layer4: Conv   14 x 14 x 64
    # layer5: Deconv 28 x 28 x 64
    # layer6: Conv   28 x 28 x 1

    return model