Ejemplo n.º 1
0
def MixSymbolDemodulator(input_shape,
                         filter_num,
                         label_size,
                         signal_type_shape,
                         dropout=0.0):
    input = Input(shape=input_shape)
    signal_type = Input(shape=signal_type_shape)
    kernel_sizes = [(3, 1), (16, 1)]
    kernel2_sizes = [4, 3]
    all_strides = [1, 2]
    outputs = list()
    for kernel_size, kernel2_size, strides in zip(kernel_sizes, kernel2_sizes,
                                                  all_strides):
        conv1Output = Conv2D(filters=filter_num,
                             kernel_size=kernel_size,
                             strides=strides)(input)
        conv1Output = Reshape([
            conv1Output.get_shape().as_list()[1],
            conv1Output.get_shape().as_list()[3]
        ])(conv1Output)
        # conv1Output = BatchNormalization()(conv1Output)
        conv1Output = Activation('selu')(conv1Output)
        conv1Output = Dropout(dropout)(conv1Output)
        conv1Output = Concatenate(axis=-1)([
            conv1Output,
            RepeatVector(conv1Output.get_shape().as_list()[1])(signal_type)
        ])
        conv1Output = Reshape([
            conv1Output.get_shape().as_list()[1],
            conv1Output.get_shape().as_list()[2], 1
        ])(conv1Output)
        conv2Output = Conv2D(
            filters=filter_num / 2,
            kernel_size=[kernel2_size,
                         conv1Output.get_shape().as_list()[2]],
            strides=[2, 1])(conv1Output)
        conv2Output = Reshape([
            conv2Output.get_shape().as_list()[1],
            conv2Output.get_shape().as_list()[3], 1
        ])(conv2Output)
        # conv2Output = BatchNormalization()(conv2Output)
        conv2Output = Activation('selu')(conv2Output)
        pooling1Output = MaxPool2D((2, 1))(conv2Output)
        pooling1Output = Flatten()(pooling1Output)
        dropout2Output = Dropout(dropout)(pooling1Output)
        output = Dense(16, activation=relu)(dropout2Output)
        outputs.append(output)

    outputs.append(signal_type)
    output = Concatenate(axis=-1)(outputs)
    output = Dense(label_size, activation=softmax)(output)
    return Model([input, signal_type], output)
Ejemplo n.º 2
0
def create_convlstm_model(fingerprint_input, model_settings, is_training):

    tf.logging.info("conv lstm is used")
    if is_training:
        dropout_prob = tf.placeholder(tf.float32, name='dropout_prob')

    input_frequency_size = model_settings['dct_coefficient_count']
    input_time_size = model_settings['spectrogram_length']
    label_count = model_settings['label_count']

    x = Reshape((14, int(input_time_size / 14), input_frequency_size,
                 1))(fingerprint_input)
    x = ConvLSTM2D(64, (8, 20))(x)
    x = MaxPooling2D(pool_size=(3, 3), padding='same')(x)
    if is_training:
        x = tf.nn.dropout(x, dropout_prob)

    shape = x.get_shape().as_list()  #5d
    tf.logging.info("second layer shape:" + str(shape))

    x = Conv2D(64, (2, 10), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2), padding='same')(x)
    if is_training:
        x = tf.nn.dropout(x, dropout_prob)

    x = Flatten()(x)
    x = Dense(label_count, activation='tanh')(x)

    if is_training:
        return x, dropout_prob
    else:
        return x
Ejemplo n.º 3
0
def create_lstm_model(fingerprint_input, model_settings, is_training):

    tf.logging.info("lstm is used")
    if is_training:
        dropout_prob = tf.placeholder(tf.float32, name='dropout_prob')

    input_frequency_size = model_settings['dct_coefficient_count']
    input_time_size = model_settings['spectrogram_length']
    label_count = model_settings['label_count']

    x = Reshape((input_time_size, input_frequency_size))(fingerprint_input)
    x = LSTM(1024)(x)

    if is_training:
        x = tf.nn.dropout(x, dropout_prob)

    shape = x.get_shape().as_list()  #5d
    tf.logging.info("second layer shape:" + str(shape))

    x = Dense(label_count, activation='tanh')(x)

    if is_training:
        return x, dropout_prob
    else:
        return x
def renet_module(X,hidden_size,receptive_filter_size=2, batch_size=1):


	_, X_height, X_width, X_channel= X.get_shape()
	print X_height
	vertical_rnn_inputs_fw,vertical_rnn_inputs_rev,horizontal_rnn_inputs_fw,horizontal_rnn_inputs_rev = rnn_input_layer(receptive_filter_size)(X)
	renet1 = CuDNNLSTM(hidden_size, return_sequences=True)(vertical_rnn_inputs_fw)
	print renet1.shape
	renet2 = CuDNNLSTM(hidden_size, return_sequences=True)(vertical_rnn_inputs_rev)
	print renet2.shape
	renet3 = CuDNNLSTM(hidden_size, return_sequences=True)(horizontal_rnn_inputs_fw)
	renet4 = CuDNNLSTM(hidden_size, return_sequences=True)(horizontal_rnn_inputs_rev)

	renet_concat =  Concatenate(axis=2)([renet1, renet2, renet3, renet4])
	print renet_concat.shape
	renet = Reshape((int(X_height)/receptive_filter_size, int(X_width)/receptive_filter_size, -1))(renet_concat)
	print renet.get_shape()

	return renet
Ejemplo n.º 5
0
def getModel(C, H, W, classes, h=None, mode="train"):
    inp = Input(shape=(C, H, W))
    out = conv_bn_relu(inp, 64, 3)
    out = conv_bn_relu(out, 64, 3)
    out = MaxPooling2D(pool_size=(2, 2))(out)

    out = conv_bn_relu(out, 128, 3)
    out = conv_bn_relu(out, 128, 3)
    out = MaxPooling2D(pool_size=(2, 2))(out)

    out = conv_bn_relu(out, 256, 3)
    out = conv_bn_relu(out, 256, 3)
    out = conv_bn_relu(out, 256, 3)
    out = MaxPooling2D(pool_size=(2, 2))(out)

    out = conv_bn_relu(out, 512, 3)
    out = conv_bn_relu(out, 512, 3)
    out = conv_bn_relu(out, 512, 3)
    if mode != "dense":
        if h is None: _, c, h, w = [d.value for d in out.get_shape()]
        out = conv_bn_relu(out, 512, h, padding='valid')
        out = conv_bn_relu(out, classes, 1, activation=None, padding='valid')
    if mode == "train":
        out = Reshape((classes, -1))(out)
        out = Permute((2, 1))(out)
        out = Activation("softmax")(out)
        out = Flatten()(out)
    if mode == "test":
        _, c, h, w = [d.value for d in out.get_shape()]
        out = Reshape((classes, -1))(out)
        out = Permute((2, 1))(out)
        out = Activation("softmax")(out)
        out = Permute((2, 1))(out)
        out = Reshape((c, h, w))(out)

    if mode == "dense":
        out = MaxPooling2D(pool_size=(2, 2))(out)
        out = Flatten()(out)
        out = Dense(512, activation='relu')(out)
        out = Dense(classes, activation='softmax')(out)
        #out = Permute((2,1))(out)
        #out = Activation("softmax")(out)
        ##out = Permute((2,1))(out)
        #out = Reshape((c,h,w))(out)
    #out = Flatten()(out)
    #out = Dense(512,activation="relu")(out)
    #out = Dense(classes, activation="softmax")(out)
    return Model(inp, out)
Ejemplo n.º 6
0
def MixSignalDecoder(input_shape,
                     filter_num,
                     kernel_size,
                     strides,
                     label_size,
                     signal_type_shape,
                     dropout=0.0):
    input = Input(shape=input_shape)
    signal_type = Input(shape=signal_type_shape)
    conv1Output = Conv2D(filters=filter_num,
                         kernel_size=kernel_size,
                         strides=strides)(input)
    conv1Output = Reshape([
        conv1Output.get_shape().as_list()[1],
        conv1Output.get_shape().as_list()[3]
    ])(conv1Output)
    # conv1Output = BatchNormalization()(reshapeOutput)
    conv1Output = Activation('selu')(conv1Output)
    conv1Output = Dropout(dropout)(conv1Output)
    conv1Output = Concatenate(axis=-1)([
        conv1Output,
        RepeatVector(conv1Output.get_shape().as_list()[1])(signal_type)
    ])
    conv1Output = Reshape([
        conv1Output.get_shape().as_list()[1],
        conv1Output.get_shape().as_list()[2], 1
    ])(conv1Output)
    conv2Output = ZeroPadding2D((1, 0))(conv1Output)
    conv2Output = Conv2D(filters=filter_num / 2,
                         kernel_size=[3,
                                      conv1Output.get_shape().as_list()[2]],
                         strides=[2, 1])(conv2Output)
    conv2Output = Reshape([
        conv2Output.get_shape().as_list()[1],
        conv2Output.get_shape().as_list()[3]
    ])(conv2Output)
    # conv2Output = BatchNormalization()(conv2Output)
    conv2Output = Activation('selu')(conv2Output)
    conv2Output = Dropout(dropout)(conv2Output)
    conv2Output = Concatenate(axis=-1)([
        conv2Output,
        RepeatVector(conv2Output.get_shape().as_list()[1])(signal_type)
    ])
    conv2Output = TimeDistributed(Dense(16, activation=relu))(conv2Output)
    conv2Output = TimeDistributed(Dense(label_size,
                                        activation=softmax))(conv2Output)
    return Model([input, signal_type], conv2Output)
Ejemplo n.º 7
0
def DeepCNN(input_shape,
            filter_num,
            kernel_size,
            strides,
            label_size,
            dropout=0.0):
    input = Input(shape=input_shape)
    conv1Output = Conv2D(filters=filter_num,
                         kernel_size=kernel_size,
                         strides=strides)(input)
    reshapeOutput = Reshape([
        conv1Output.get_shape().as_list()[1],
        conv1Output.get_shape().as_list()[3]
    ])(conv1Output)
    conv1Output = BatchNormalization()(reshapeOutput)
    conv1Output = Activation('relu')(conv1Output)
    conv1Output = Dropout(dropout)(conv1Output)
    conv1Output = Reshape([
        conv1Output.get_shape().as_list()[1],
        conv1Output.get_shape().as_list()[2], 1
    ])(conv1Output)
    conv2Output = ZeroPadding2D((1, 0))(conv1Output)
    conv2Output = Conv2D(filters=filter_num / 2,
                         kernel_size=[3, filter_num],
                         strides=[2, 1])(conv2Output)
    conv2Output = Reshape([
        conv2Output.get_shape().as_list()[1],
        conv2Output.get_shape().as_list()[3]
    ])(conv2Output)
    conv2Output = BatchNormalization()(conv2Output)
    conv2Output = Activation('relu')(conv2Output)
    conv2Output = Dropout(dropout)(conv2Output)
    conv2Output = TimeDistributed(Dense(16, activation=relu))(conv2Output)
    conv2Output = TimeDistributed(Dense(label_size,
                                        activation=softmax))(conv2Output)
    return Model(input, conv2Output)
Ejemplo n.º 8
0
def Attentive_BconvLSTM_2d(x, g, inter_channel, data_format='channels_last'):
    print(x.shape)
    print(g.shape)

    g = Conv2D(inter_channel*2, [1, 1], strides=[1, 1], data_format=data_format)(g)
    # f(?,g_height,g_width,inter_channel)
    x = Reshape(target_shape=(1, x.get_shape().as_list()[1], x.get_shape().as_list()[2], inter_channel*2))(x)
    g = Reshape(target_shape=(1, g.get_shape().as_list()[1], g.get_shape().as_list()[2], inter_channel*2))(x)
    merge = concatenate([x, g], axis=1)

    f = ConvLSTM2D(filters=inter_channel, kernel_size=(3, 3), padding='same', return_sequences=False, go_backwards=True,
                        kernel_initializer='he_normal')(merge)
    f = Activation('relu')(f)

    psi_f = Conv2D(1, [1, 1], strides=[1, 1], data_format=data_format)(f)
    rate = Activation('sigmoid')(psi_f)

    x = multiply([x, rate])
    att_x = Reshape(target_shape=(x.get_shape().as_list()[2], x.get_shape().as_list()[3], x.get_shape().as_list()[4]))(x)
    return att_x
Ejemplo n.º 9
0
def build_model():
    input_seq = Input(shape=(MAXLEN * 21, ))
    x = Reshape((MAXLEN, 21))(input_seq)
    x = Conv1D(320, 7, padding='same')(x)
    print(x.get_shape())
    x = MaxPooling1D(4, padding='same')(x)
    print(x.get_shape())
    x = Conv1D(160, 7, padding='same')(x)
    print(x.get_shape())
    x = Conv1D(320, 7, padding='same')(x)
    print(x.get_shape())
    x = UpSampling1D(4)(x)
    print(x.get_shape())
    x = Conv1D(21, 7, activation='sigmoid', padding='same')(x)
    print(x.get_shape())

    decoded = Reshape((MAXLEN * 21, ))(x)

    autoencoder = Model(input_seq, decoded)
    autoencoder.compile(optimizer='sgd', loss='mean_squared_error')
    autoencoder.summary()
    return autoencoder
Ejemplo n.º 10
0
input_tensor = Input((height, width, 3))
x = input_tensor

for i in range(4):
    x = Convolution2D(32, 3, 3, activation='relu')(x)
    x = BatchNormalization(axis=-1)(x)
    x = Convolution2D(32, 3, 3, activation='relu')(x)
    x = BatchNormalization(axis=-1)(x)

    if i < 3:
        x = MaxPooling2D(pool_size=(2, 2))(x)
    else:
        x = MaxPooling2D(pool_size=(2, 1))(x)

conv_shape = x.get_shape()
x = Reshape(target_shape=(int(conv_shape[2]),
                          int(conv_shape[1] * conv_shape[3])))(x)

x = Dense(32, activation='relu')(x)

gru_1 = GRU(rnn_size, return_sequences=True, init='he_normal', name='gru1')(x)
gru_1b = GRU(rnn_size,
             return_sequences=True,
             go_backwards=True,
             init='he_normal',
             name='gru1_b')(x)
gru1_merged = keras.layers.add([gru_1, gru_1b])

gru_2 = GRU(rnn_size, return_sequences=True, init='he_normal',
            name='gru2')(gru1_merged)
Ejemplo n.º 11
0
def perframe_sequence_trainer_noattn(
    conditioning_input_shapes,
    conditioning_input_names,
    input_gt_frames_shape,
    perframe_painter_model,
    seq_len,
    is_done_model=None,
    n_const_frames=1,
    do_output_disc_stack=False,
    n_prev_frames=None,
    n_prev_disc_frames=1,
    n_painter_frame_outputs=2,
):
    if n_prev_frames is None:
        n_prev_frames = seq_len - 1

    # collect conditioning inputs, which should include last frame, prev frames, prev attns
    if not isinstance(conditioning_input_shapes, list):
        conditioning_input_shapes = [conditioning_input_shapes]
    if conditioning_input_names is None:
        conditioning_input_names = [
            'cond_input_{}'.format(ii)
            for ii in range(len(conditioning_input_shapes))
        ]

    conditioning_inputs = []
    for ii, input_shape in enumerate(conditioning_input_shapes):
        conditioning_inputs.append(
            Input(input_shape, name=conditioning_input_names[ii]))

    input_gt_frames = Input(input_gt_frames_shape, name='input_gt_frames')

    inputs = conditioning_inputs + [input_gt_frames]

    prev_frames = conditioning_inputs[1:]
    gt_frames = Reshape(input_gt_frames_shape,
                        name='reshape_gt')(input_gt_frames)

    # first two frames of the input stack will always be first and last frame
    const_frames = conditioning_inputs[:n_const_frames]
    curr_prev_frames = conditioning_inputs[n_const_frames:n_const_frames +
                                           n_prev_frames]
    curr_prev_attn_maps = conditioning_inputs[n_const_frames + n_prev_frames:]
    # first two frames of the input stack will always be first and last frame
    last_frame_seq = Lambda(
        lambda x: tf.tile(K.expand_dims(x, axis=-1), [1, 1, 1, 1, seq_len]),
        name='lambda_tile_slice_last_frame_seq')(const_frames[0])

    director_preds_seqs = []
    painter_preds_seqs = []

    for t in range(seq_len):

        # cvae
        painter_cond_inputs = const_frames + curr_prev_frames

        # provide the true frame as input to the autoencoding branch of the painter
        gt_frame = Lambda(
            lambda x: tf.gather(x, t, axis=-1),
            name='lambda_slice_gt_frames_t{}'.format(t))(gt_frames)
        painter_ae_inputs = painter_cond_inputs + [gt_frame]
        # TODO: painter network currently expects cond input first, then ae input
        painter_preds = perframe_painter_model(painter_ae_inputs +
                                               painter_cond_inputs)

        if not isinstance(painter_preds, list):
            painter_preds = [painter_preds]

        clipped_painter_frames = []
        for ppi in range(n_painter_frame_outputs):
            pred_frame = painter_preds[ppi]
            # TODO: get rid of hardcoding of range (e.g. if we are not normalizing)
            pred_frame = Lambda(
                lambda x: tf.clip_by_value(x, -1., 1.),
                name=F'lambda_clip_frame_{ppi}_t{t}')(pred_frame)
            clipped_painter_frames.append(pred_frame)
        painter_preds = clipped_painter_frames + painter_preds[
            n_painter_frame_outputs:]

        if n_prev_disc_frames > 0:
            prev_frames = curr_prev_frames[-n_prev_disc_frames:]

        curr_prev_frames = curr_prev_frames[1:] + [painter_preds[0]]

        for ppi, pp in enumerate(painter_preds):
            # give every prediction a time dimension, and concatenate it
            pp = Reshape(pp.get_shape().as_list()[1:] + [1],
                         name='reshape_t{}_pp{}'.format(t, ppi))(pp)
            if t == 0:
                painter_preds_seqs.append(pp)
            else:
                painter_preds_seqs[ppi] = Concatenate(
                    name='concat_t{}_pp{}'.format(t, ppi))(
                        [painter_preds_seqs[ppi], pp])

        ####### compile information needed for conditional discriminators #######
        if (do_output_disc_stack):
            if n_prev_disc_frames > 0:
                prev_frames = Reshape(
                    prev_frames.get_shape().as_list()[1:] + [1],
                    name='exptdim_t{}_prevframes'.format(t))(prev_frames)
                if t == 0:
                    prev_frames_seq = prev_frames
                else:
                    prev_frames_seq = Concatenate(
                        name='concat_t{}_prevframe'.format(t))(
                            [prev_frames_seq, prev_frames])

        ####### compile information needed for our "is done" classifier
        if is_done_model is not None:
            # completed painting, and current prediction
            is_done_inputs = Concatenate(axis=-1)(
                [const_frames[0], pred_frame])

            # run the classifier
            is_done_pred = is_done_model(is_done_inputs)

            # add a time dimension
            is_done_pred = Reshape(
                is_done_pred.get_shape().as_list()[1:] + [1],
                name='exptdim_t{}_isdone'.format(t))(is_done_pred)
            if t == 0:
                is_done_preds_seq = is_done_pred
            else:
                is_done_preds_seq = Concatenate(
                    name='concat_t{}_isdone'.format(t),
                    axis=-1)([is_done_preds_seq, is_done_pred])

    outputs = director_preds_seqs + painter_preds_seqs

    # if we are using a discriminator, output the discriminator input stacks at the end so we can evaluate the scores
    if do_output_disc_stack:
        disc_inputs = [last_frame_seq]
        if n_prev_disc_frames > 0:
            disc_inputs.append(prev_frames_seq)
        # discriminator on attention map
        director_disc_stack = Concatenate(
            axis=-2,
            name='concat_director_disc_stack')(disc_inputs +
                                               [director_preds_seqs[0]])
        outputs += [director_disc_stack]

    return Model(inputs=inputs,
                 outputs=outputs,
                 name='seqlen{}_perframe_trainer_model'.format(seq_len))
Ejemplo n.º 12
0
def DeepCNNSeq2Seq(filter_num,
                   kernel_size,
                   strides,
                   output_dim,
                   output_length,
                   label_size,
                   hidden_dim=None,
                   input_shape=None,
                   batch_size=None,
                   batch_input_shape=None,
                   input_dim=None,
                   input_length=None,
                   depth=1,
                   dropout=0.0,
                   unroll=False,
                   stateful=False,
                   model_type='simple'):
    input = Input(shape=input_shape)
    conv1Output = Conv2D(filters=filter_num,
                         kernel_size=kernel_size,
                         strides=strides)(input)
    reshapeOutput = Reshape([
        conv1Output.get_shape().as_list()[1],
        conv1Output.get_shape().as_list()[3]
    ])(conv1Output)
    conv1Output = BatchNormalization()(reshapeOutput)
    conv1Output = Activation('relu')(conv1Output)
    conv1Output = Dropout(dropout)(conv1Output)
    conv1Output = Reshape([
        conv1Output.get_shape().as_list()[1],
        conv1Output.get_shape().as_list()[2], 1
    ])(conv1Output)
    conv2Output = Conv2D(filters=filter_num / 2,
                         kernel_size=[3, filter_num],
                         strides=[2, 1])(conv1Output)
    conv2Output = Reshape([
        conv2Output.get_shape().as_list()[1],
        conv2Output.get_shape().as_list()[3]
    ])(conv2Output)
    conv2Output = BatchNormalization()(conv2Output)
    conv2Output = Activation('relu')(conv2Output)
    conv2Output = Dropout(dropout)(conv2Output)
    # pool1Output = MaxPool2D((conv1Output.get_shape().as_list()[1], 1))(conv1Output)
    # reshapeOutput = Reshape([pool1Output.get_shape().as_list()[3], 1])(pool1Output)
    if model_type == 'attention':
        encoder, decoder = AttentionSeq2Seq(
            output_dim,
            output_length,
            batch_size=batch_size,
            input_shape=tuple(conv2Output.get_shape().as_list()[1:]),
            input_length=input_length,
            input_dim=input_dim,
            hidden_dim=hidden_dim,
            depth=depth,
            unroll=unroll,
            stateful=stateful,
            dropout=dropout)
    else:
        encoder, decoder = SimpleSeq2Seq(
            output_dim, output_length, hidden_dim,
            tuple(conv2Output.get_shape().as_list()[1:]), batch_size,
            batch_input_shape, input_dim, input_length, depth, dropout, unroll,
            stateful)
    seq2seqEncoderOutput = encoder(conv2Output)
    seq2seqDecoderOutput = decoder(seq2seqEncoderOutput)
    output = TimeDistributed(Dense(label_size,
                                   activation=softmax))(seq2seqDecoderOutput)
    return Model(input, output)
Ejemplo n.º 13
0
def perframe_sequence_tester(
        perframe_tester_model,
        img_shape=(50, 50, 3),
        seq_len=40,
        latent_shape=(5, ),
        n_const_frames=1,
        n_prev_frames=1,
):
    inputs = [
        Input(img_shape, name='input_last_frame'),
        Input(img_shape, name='input_prev_frame')
    ]

    # first two frames of the input stack will always be first and last frame
    const_frames = inputs[:n_const_frames]
    curr_prev_frames = inputs[n_const_frames:n_const_frames + n_prev_frames]

    # a dummy input that enables us to do sampling of the latent input in the network
    z_dummy = Input(latent_shape, name='input_z_dummy')
    inputs += [z_dummy]

    painter_preds_seqs = []

    for t in range(seq_len):
        # assumes frame prediction is always the first pred, others might be KL
        painter_cond_inputs = const_frames + curr_prev_frames
        z_samp = Lambda(sampling_sigma1,
                        name=f'lambda_z_sampling_frame{t}')(z_dummy)
        painter_preds = perframe_tester_model(painter_cond_inputs + [z_samp])

        if not isinstance(painter_preds, list):
            painter_preds = [painter_preds]
        else:
            painter_preds = [painter_preds[0]
                             ]  # cvae painter might return transformed, delta

        clipped_painter_frames = []
        for ppi in range(len(painter_preds)):
            pred_frame = painter_preds[ppi]
            pred_frame = Lambda(
                lambda x: tf.clip_by_value(x, -1., 1.),
                name=F'lambda_clip_frame_{ppi}_t{t}')(pred_frame)
            clipped_painter_frames.append(pred_frame)
        painter_preds = clipped_painter_frames

        # shift previous frames to make room for our new painter prediction
        curr_prev_frames = curr_prev_frames[1:] + [painter_preds[0]]

        for ppi, pp in enumerate(painter_preds):
            # give every prediction a time dimension, and concatenate it

            pp = Reshape(pp.get_shape().as_list()[1:] + [1],
                         name='reshape_t{}_pp{}'.format(t, ppi))(pp)
            if t == 0:
                painter_preds_seqs.append(pp)
            else:
                painter_preds_seqs[ppi] = Concatenate(
                    name='concat_t{}_pp{}'.format(t, ppi))(
                        [painter_preds_seqs[ppi], pp])

    return Model(inputs=inputs,
                 outputs=painter_preds_seqs,
                 name='seqlen{}_model'.format(seq_len))
Ejemplo n.º 14
0
def perframe_sampling_sequence_trainer_noattn(
    conditioning_input_shapes,
    conditioning_input_names,
    perframe_painter_model,
    seq_len,
    n_prev_frames=1,
    n_const_frames=1,
    n_prev_disc_frames=1,
    n_const_disc_frames=1,
    n_painter_frame_outputs=2,
    painter_latent_shape=None,
    make_painter_disc_stack=False,
):
    if n_prev_frames is None:
        n_prev_frames = seq_len - 1

    if not isinstance(conditioning_input_shapes, list):
        conditioning_input_shapes = [conditioning_input_shapes]
    if conditioning_input_names is None:
        conditioning_input_names = [
            'cond_input_{}'.format(ii)
            for ii in range(len(conditioning_input_shapes))
        ]

    conditioning_inputs = []
    for ii, input_shape in enumerate(conditioning_input_shapes):
        conditioning_inputs.append(
            Input(input_shape, name=conditioning_input_names[ii]))
    inputs = [ci for ci in conditioning_inputs]

    # these inputs are required so we can use keras sampling..still havent
    # figured out how to do it without an initial input first

    if painter_latent_shape is not None:
        # if the painter uses a CVAE
        dummy_z_p_input = Input(painter_latent_shape, name='input_z_p_dummy')
        inputs += [dummy_z_p_input]

    # first two frames of the input stack will always be first and last frame
    const_frames = conditioning_inputs[:n_const_frames]
    curr_prev_frames = conditioning_inputs[n_const_frames:n_const_frames +
                                           n_prev_frames]
    curr_prev_attn_maps = conditioning_inputs[n_const_frames + n_prev_frames:]

    # first two frames of the input stack will always be first and last frame
    last_frame_seq = Lambda(
        lambda x: tf.tile(K.expand_dims(x, axis=-1), [1, 1, 1, 1, seq_len]),
        name='lambda_tile_slice_last_frame_seq')(const_frames[0])

    painter_preds_seqs = []
    painter_deltas_seq = []
    for t in range(seq_len):
        ####### compile information needed for conditional discriminators #######
        if (make_painter_disc_stack) and n_prev_disc_frames > 0:
            # both discriminators will probably require prev frames
            # TODO: remove hardcoding that assumes 1 const frame
            prev_frames = Reshape(
                const_frames[0].get_shape().as_list()[1:] + [1],
                name='exptdim_t{}_prevframes'.format(t))(curr_prev_frames[-1])
            if t == 0:
                prev_frames_seq = prev_frames
            else:
                prev_frames_seq = Concatenate(
                    name='concat_t{}_prevframe'.format(t))(
                        [prev_frames_seq, prev_frames])

        # cvae
        painter_cond_inputs = const_frames + curr_prev_frames

        # sample from the painter's prior instead
        painter_preds = perframe_painter_model(painter_cond_inputs +
                                               [dummy_z_p_input])

        if not isinstance(painter_preds, list):
            painter_preds = [painter_preds]

        # assumes frame prediction is always the first pred, others might be KL
        clipped_painter_frames = []
        for ppi in range(n_painter_frame_outputs):
            curr_pred_frame = painter_preds[ppi]
            curr_pred_name = os.path.basename(
                os.path.dirname(curr_pred_frame.name))
            # TODO: get rid of hardcoding of range (e.g. if we are not normalizing)
            curr_pred_frame = Lambda(
                lambda x: tf.clip_by_value(x, -1., 1.),
                name=F'clip_pp{ppi}_{curr_pred_name}_t{t}')(curr_pred_frame)
            clipped_painter_frames.append(curr_pred_frame)
        painter_preds = clipped_painter_frames + painter_preds[
            n_painter_frame_outputs:]
        curr_pred_frame = painter_preds[0]

        curr_prev_frames = curr_prev_frames[1:] + [curr_pred_frame]

        ########### compile predictions into sequences in time ######################
        # hacky, but if the painter predicts a delta, we only want the first few recon outputs (the transformed frame)
        # and we can ignore the following output (the transform/delta)
        for ppi, pp in enumerate(painter_preds[:n_painter_frame_outputs]):
            # give every prediction a time dimension, and concatenate it
            ppn = os.path.basename(os.path.dirname(pp.name))

            pp = Reshape(pp.get_shape().as_list()[1:] + [1],
                         name='reshape_t{}_pp{}'.format(t, ppi))(pp)
            if t == 0:
                painter_preds_seqs.append(pp)
            else:
                painter_preds_seqs[ppi] = Concatenate(
                    name='concat_t{}_pp-{}'.format(t, ppn))(
                        [painter_preds_seqs[ppi], pp])

    outputs = painter_preds_seqs

    # if we are using a discriminator, output the discriminator input stacks at the end so we can evaluate the scores
    if make_painter_disc_stack:
        disc_inputs = []
        if n_const_disc_frames > 0:
            disc_inputs.append(last_frame_seq)

        if n_prev_disc_frames > 0:
            disc_inputs.append(prev_frames_seq)

        painter_disc_stack = Concatenate(
            axis=-2, name='concat_painter_disc_stack')(disc_inputs +
                                                       [painter_preds_seqs[0]])
        outputs += [painter_disc_stack]

    return Model(
        inputs=inputs,
        outputs=outputs,
        name='seqlen{}_perframe_sampling_trainer_model'.format(seq_len))
Ejemplo n.º 15
0
def ssd_300(image_size,
            n_classes,
            mode='training',
            l2_regularization=0.0005,
            min_scale=None,
            max_scale=None,
            scales=None,
            aspect_ratios_global=None,
            aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
            two_boxes_for_ar1=True,
            steps=[8, 16, 32, 64, 100, 300],
            offsets=None,
            clip_boxes=False,
            variances=[0.1, 0.1, 0.2, 0.2],
            coords='centroids',
            normalize_coords=True,
            subtract_mean=[123, 117, 104],
            divide_by_stddev=None,
            swap_channels=[2, 1, 0],
            confidence_thresh=0.01,
            iou_threshold=0.45,
            top_k=200,
            nms_max_output_size=400,
            return_predictor_sizes=False):
    '''
    Xây dựng model SSD300 với keras.
    Base network được sử dụng là VGG16.

    Chú ý: Yêu cầu Keras>=v2.0; TensorFlow backend>=v1.0.

    Arguments:
        image_size (tuple): Kích thước image input `(height, width, channels)`.
        n_classes (int): Số classes, chẳng hạn 20 cho Pascal VOC dataset, 80 cho MS COCO dataset.
        mode (str, optional): Một trong những dạng 'training', 'inference' và 'inference_fast'.
            'training' mode: Đầu ra của model là raw prediction tensor.
            'inference' và 'inference_fast' modes: raw predictions được decoded thành tọa độ đã được filtered thông qua threshold.
        l2_regularization (float, optional): L2-regularization rate. Áp dụng cho toàn bộ các convolutional layers.
        min_scale (float, optional): Nhân tố scaling nhỏ nhất cho các size của anchor boxes. Tỷ lệ này được tính trên so sánh với cạnh ngắn hơn
        của hình ảnh input.
        max_scale (float, optional): Nhân tố scale lớn nhất cho các size của anchor boxes.
        scales (list, optional): List các số floats chứa các nhân tố scaling của các convolutional predictor layer.
            List này phải lớn hơn số lượng các predictor layers là 1 để sử dụng cho trường hợp aspect ratio = 1 sẽ tính thêm next scale.
            Trong TH sử dụng scales thì interpolate theo min_scale và max_scale để tính list scales sẽ không được sử dụng.
        aspect_ratios_global (list, optional): List của các aspect ratios mà các anchor boxes được tạo thành. List này được áp dụng chung trên toàn bộ các prediction layers.
        aspect_ratios_per_layer (list, optional): List của các list aspect ratio cho mỗi một prediction layer.
            Nếu được truyền vào sẽ override `aspect_ratios_global`.
        two_boxes_for_ar1 (bool, optional): Chỉ áp dụng khi aspect ratio lists chứa 1. Sẽ bị loại bỏ trong các TH khác.
            Nếu `True`, 2 anchor boxes sẽ được tạo ra ứng với aspect ratio = 1. anchor box đầu tiên tạo thành bằng cách sử scale, anchor box thứ 2
            được tạo thành bằng trung bình hình học của scale và next scale.
        steps (list, optional): `None` hoặc là list với rất nhiều các phần tử có số lượng bằng với số lượng layers.
            Mỗi phần tử đại diện cho mỗi một predictor layer có bao nhiêu pixels khoảng cách giữa các tâm của anchor box.
            steps có thể gồm 2 số đại diện cho (step_width, step_height).
            nếu không có steps nào được đưa ra thì chúng ta sẽ tính để cho khoảng các giữa các tâm của anchor box là bằng nhau
        offsets (list, optional): None hoặc là các con số đại diện cho mỗi một predictor layer bao nhiêu pixels từ góc trên và bên trái mở rộng của ảnh
        clip_boxes (bool, optional): Nếu `True`, giới hạn tọa độ các anchor box để nằm trong boundaries của image.
        variances (list, optional): Một list gồm 4 số floats >0. Một anchor box offset tương ứng với mỗi tọa độ sẽ được chi cho giá trị variance tương ứng.
        coords (str, optional): Tọa độ của box được sử dụng bên trong model (chẳng hạn, nó không là input format của ground truth labels).
            Có thể là dạng 'centroids' format `(cx, cy, w, h)` (box center coordinates, width,
            and height), 'minmax' format `(xmin, xmax, ymin, ymax)`, hoặc 'corners' format `(xmin, ymin, xmax, ymax)`.
        normalize_coords (bool, optional): Được đặt là `True` nếu model được giả định sử dụng tọa độ tương đối thay vì tuyệt đối coordinates,
            chẳng hạn nếu model dự báo tọa độ box nằm trong [0, 1] thay vì tọa độ tuyệt đối.
        subtract_mean (array-like, optional): `None` hoặc một array object với bất kì shape nào mà dạng mở rộng phù hợp với shape của ảnh. Gía trị của nó được bớt đi từ độ lớn pixel của ảnh. The elements of this array will be
            Chẳng hạn truyền vào một list gồm 3 số nguyên để tính toán trung bình chuẩn hóa cho các kênh của ảnh.
        divide_by_stddev (array-like, optional): `None` hoặc một array object. Tương tự như subtract_mean nhưng được chia cho từ độ lớn của ảnh để tính chuẩn hóa.
        swap_channels (list, optional): Là `False` hoặc một list các số nguyên biểu diễn thứ tự kì vọng mà trong đó đầu vào các channels của ảnh có thể được hoán đổi.
        confidence_thresh (float, optional): Một số float nằm trong khoảng [0,1), là ngưỡng tin cậy nhỏ nhất trong phân loại của một lớp xảy ra.
        iou_threshold (float, optional): Một float nằm trong khoảng [0,1]. Tất cả các boxes có chỉ số Jaccard similarity lớn hơn hoặc bằng `iou_threshold`
            sẽ được xem xét là chứa vệt thể bên trong nó.
        top_k (int, optional): Điểm dự báo cáo nhất được giữ trong mỗi batch item sau bước non-maximum suppression stage.
        nms_max_output_size (int, optional): Số lượng lớn nhất các dự báo sẽ được chuyển qua bước NMS stage.
        return_predictor_sizes (bool, optional): Nếu `True`, hàm số này sẽ không chỉ trả về mô hình, mà còn trả về
            một list chứa các chiều của predictor layers.

    Returns:
        model: The Keras SSD300 model.
        predictor_sizes (optional): Một numpy array chứa các phần `(height, width)` của output tensor shape tương ứng với mỗi convolutional predictor layer.

    References:
        https://arxiv.org/abs/1512.02325v5
    '''

    n_predictor_layers = 6  # Số lượng các preductor convolutional layers trong network là 6 cho original SSD300.
    n_classes += 1  # Số lượng classes, + 1 để tính thêm background class.
    l2_reg = l2_regularization  # tham số chuẩn hóa của norm chuẩn l2.
    img_height, img_width, img_channels = image_size[0], image_size[
        1], image_size[2]

    ############################################################################
    # Một số lỗi ngoại lệ.
    ############################################################################

    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        raise ValueError(
            "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified."
        )
    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError(
                "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}."
                .format(n_predictor_layers, len(aspect_ratios_per_layer)))

    # Tạo list scales
    if (min_scale is None or max_scale is None) and scales is None:
        raise ValueError(
            "Either `min_scale` and `max_scale` or `scales` need to be specified."
        )
    if scales:
        if len(scales) != n_predictor_layers + 1:
            raise ValueError(
                "It must be either scales is None or len(scales) == {}, but len(scales) == {}."
                .format(n_predictor_layers + 1, len(scales)))
    else:
        scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1)

    if len(variances) != 4:
        raise ValueError(
            "4 variance values must be pased, but {} values were received.".
            format(len(variances)))
    variances = np.array(variances)
    if np.any(variances <= 0):
        raise ValueError(
            "All variances must be >0, but the variances given are {}".format(
                variances))

    if (not (steps is None)) and (len(steps) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one step value per predictor layer.")

    if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one offset value per predictor layer.")

    ############################################################################
    # Tính các tham số của anchor box.
    ############################################################################

    # Thiết lập aspect ratios cho mỗi predictor layer (chỉ cần thiết cho tính toán anchor box layers).
    if aspect_ratios_per_layer:
        aspect_ratios = aspect_ratios_per_layer
    else:
        aspect_ratios = [aspect_ratios_global] * n_predictor_layers

    # Tính số lượng boxes được dự báo / 1 cell cho mỗi predictor layer.
    # Chúng ta cần biết bao nhiêu channels các predictor layers cần có.
    if aspect_ratios_per_layer:
        n_boxes = []
        for ar in aspect_ratios_per_layer:
            if (1 in ar) & two_boxes_for_ar1:
                n_boxes.append(len(ar) +
                               1)  # +1 cho trường hợp aspect ratio = 1
            else:
                n_boxes.append(len(ar))
    else:  # Nếu chỉ 1 global aspect ratio list được truyền vào thì số lượng boxes là như nhau cho mọi layers.
        if (1 in aspect_ratios_global) & two_boxes_for_ar1:
            n_boxes = len(aspect_ratios_global) + 1
        else:
            n_boxes = len(aspect_ratios_global)
        n_boxes = [n_boxes] * n_predictor_layers

    if steps is None:
        steps = [None] * n_predictor_layers
    if offsets is None:
        offsets = [None] * n_predictor_layers

    ############################################################################
    # Xác định các hàm số cho Lambda layers bên dưới.
    ############################################################################

    def identity_layer(tensor):
        return tensor

    def input_mean_normalization(tensor):
        return tensor - np.array(subtract_mean)

    def input_stddev_normalization(tensor):
        return tensor / np.array(divide_by_stddev)

    def input_channel_swap(tensor):
        if len(swap_channels) == 3:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]]
            ],
                           axis=-1)
        elif len(swap_channels) == 4:
            return K.stack([
                tensor[..., swap_channels[0]], tensor[..., swap_channels[1]],
                tensor[..., swap_channels[2]], tensor[..., swap_channels[3]]
            ],
                           axis=-1)

    ############################################################################
    # Bước 1: Xây dựng network.
    ############################################################################

    x = Input(shape=(img_height, img_width, img_channels))

    x1 = Lambda(identity_layer,
                output_shape=(img_height, img_width, img_channels),
                name='identity_layer')(x)
    if not (subtract_mean is None):
        x1 = Lambda(input_mean_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_mean_normalization')(x1)
    if not (divide_by_stddev is None):
        x1 = Lambda(input_stddev_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_stddev_normalization')(x1)
    if swap_channels:
        x1 = Lambda(input_channel_swap,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_channel_swap')(x1)

    ############################################################################
    # Bước 1.1: Tính toán base network là mạng VGG16
    ############################################################################

    conv1_1 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv1_1')(x1)
    conv1_2 = Conv2D(64, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv1_2')(conv1_1)
    pool1 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool1')(conv1_2)

    conv2_1 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv2_1')(pool1)
    conv2_2 = Conv2D(128, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv2_2')(conv2_1)
    pool2 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool2')(conv2_2)

    conv3_1 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_1')(pool2)
    conv3_2 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_2')(conv3_1)
    conv3_3 = Conv2D(256, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv3_3')(conv3_2)
    pool3 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool3')(conv3_3)

    conv4_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_1')(pool3)
    conv4_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_2')(conv4_1)
    conv4_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv4_3')(conv4_2)
    pool4 = MaxPooling2D(pool_size=(2, 2),
                         strides=(2, 2),
                         padding='same',
                         name='pool4')(conv4_3)

    conv5_1 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_1')(pool4)
    conv5_2 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_2')(conv5_1)
    conv5_3 = Conv2D(512, (3, 3),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv5_3')(conv5_2)
    pool5 = MaxPooling2D(pool_size=(3, 3),
                         strides=(1, 1),
                         padding='same',
                         name='pool5')(conv5_3)

    ############################################################################
    # Bước 1.2: Áp dụng các convolutional filter có kích thước (3 x 3) để tính toán ra features map.
    ############################################################################

    fc6 = Conv2D(1024, (3, 3),
                 dilation_rate=(6, 6),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 kernel_regularizer=l2(l2_reg),
                 name='fc6')(pool5)
    print('fully connected 6: ', fc6.get_shape())
    fc7 = Conv2D(1024, (1, 1),
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal',
                 kernel_regularizer=l2(l2_reg),
                 name='fc7')(fc6)
    print('fully connected 7: ', fc7.get_shape())
    conv6_1 = Conv2D(256, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv6_1')(fc7)
    conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv6_padding')(conv6_1)
    conv6_2 = Conv2D(512, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv6_2')(conv6_1)
    print('conv6_2: ', conv6_2.get_shape())
    conv7_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv7_1')(conv6_2)
    conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv7_padding')(conv7_1)
    conv7_2 = Conv2D(256, (3, 3),
                     strides=(2, 2),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv7_2')(conv7_1)
    print('conv7_2: ', conv7_2.get_shape())
    conv8_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv8_1')(conv7_2)
    conv8_2 = Conv2D(256, (3, 3),
                     strides=(1, 1),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv8_2')(conv8_1)
    print('conv8_2: ', conv8_2.get_shape())
    conv9_1 = Conv2D(128, (1, 1),
                     activation='relu',
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv9_1')(conv8_2)
    conv9_2 = Conv2D(256, (3, 3),
                     strides=(1, 1),
                     activation='relu',
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv9_2')(conv9_1)
    print('conv9_2: ', conv9_2.get_shape())
    # Feed conv4_3 vào the L2 normalization layer
    conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3)
    print('conv4_3_norm.shape: ', conv4_3_norm.get_shape())

    ############################################################################
    # Bước 1.3: Xác định output phân phối xác suất theo các classes ứng với mỗi một default bounding box.
    ############################################################################

    ### Xây dựng các convolutional predictor layers tại top của base network
    # Chúng ta dự báo các giá trị confidence cho mỗi box, do đó confidence predictors có độ sâu `n_boxes * n_classes`
    # Đầu ra của confidence layers có shape: `(batch, height, width, n_boxes * n_classes)`
    conv4_3_norm_mbox_conf = Conv2D(
        n_boxes[0] * n_classes, (3, 3),
        padding='same',
        kernel_initializer='he_normal',
        kernel_regularizer=l2(l2_reg),
        name='conv4_3_norm_mbox_conf')(conv4_3_norm)
    print('conv4_3_norm_mbox_conf.shape: ', conv4_3_norm_mbox_conf.get_shape())
    fc7_mbox_conf = Conv2D(n_boxes[1] * n_classes, (3, 3),
                           padding='same',
                           kernel_initializer='he_normal',
                           kernel_regularizer=l2(l2_reg),
                           name='fc7_mbox_conf')(fc7)
    print('fc7_mbox_conf.shape: ', fc7_mbox_conf.get_shape())
    conv6_2_mbox_conf = Conv2D(n_boxes[2] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv6_2_mbox_conf')(conv6_2)
    conv7_2_mbox_conf = Conv2D(n_boxes[3] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv7_2_mbox_conf')(conv7_2)
    conv8_2_mbox_conf = Conv2D(n_boxes[4] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv8_2_mbox_conf')(conv8_2)
    conv9_2_mbox_conf = Conv2D(n_boxes[5] * n_classes, (3, 3),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv9_2_mbox_conf')(conv9_2)
    print('conv9_2_mbox_conf: ', conv9_2_mbox_conf.get_shape())

    ############################################################################
    # Bước 1.4: Xác định output các tham số offset của default bounding boxes tương ứng với mỗi cell trên các features map.
    ############################################################################

    # Chúng ta dự báo 4 tọa độ cho mỗi box, do đó localization predictors có độ sâu `n_boxes * 4`
    # Output shape của localization layers: `(batch, height, width, n_boxes * 4)`
    conv4_3_norm_mbox_loc = Conv2D(n_boxes[0] * 4, (3, 3),
                                   padding='same',
                                   kernel_initializer='he_normal',
                                   kernel_regularizer=l2(l2_reg),
                                   name='conv4_3_norm_mbox_loc')(conv4_3_norm)
    print('conv4_3_norm_mbox_loc: ', conv4_3_norm_mbox_loc.get_shape())
    fc7_mbox_loc = Conv2D(n_boxes[1] * 4, (3, 3),
                          padding='same',
                          kernel_initializer='he_normal',
                          kernel_regularizer=l2(l2_reg),
                          name='fc7_mbox_loc')(fc7)
    conv6_2_mbox_loc = Conv2D(n_boxes[2] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv6_2_mbox_loc')(conv6_2)
    conv7_2_mbox_loc = Conv2D(n_boxes[3] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv7_2_mbox_loc')(conv7_2)
    conv8_2_mbox_loc = Conv2D(n_boxes[4] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv8_2_mbox_loc')(conv8_2)
    conv9_2_mbox_loc = Conv2D(n_boxes[5] * 4, (3, 3),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv9_2_mbox_loc')(conv9_2)
    print('conv9_2_mbox_loc: ', conv9_2_mbox_loc.get_shape())

    ############################################################################
    # Bước 1.5: Tính toán các AnchorBoxes làm cơ sở để dự báo offsets cho các predicted bounding boxes bao quan vật thể
    ############################################################################

    ### Khởi tạo các anchor boxes (được gọi là "priors" trong code gốc Caffe/C++ của mô hình)
    # Shape output của anchors: `(batch, height, width, n_boxes, 8)`
    conv4_3_norm_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[0],
        next_scale=scales[1],
        aspect_ratios=aspect_ratios[0],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[0],
        this_offsets=offsets[0],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv4_3_norm_mbox_priorbox')(conv4_3_norm_mbox_loc)
    print('conv4_3_norm_mbox_priorbox: ',
          conv4_3_norm_mbox_priorbox.get_shape())
    fc7_mbox_priorbox = AnchorBoxes(img_height,
                                    img_width,
                                    this_scale=scales[1],
                                    next_scale=scales[2],
                                    aspect_ratios=aspect_ratios[1],
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    this_steps=steps[1],
                                    this_offsets=offsets[1],
                                    clip_boxes=clip_boxes,
                                    variances=variances,
                                    coords=coords,
                                    normalize_coords=normalize_coords,
                                    name='fc7_mbox_priorbox')(fc7_mbox_loc)
    print('fc7_mbox_priorbox: ', fc7_mbox_priorbox.get_shape())
    conv6_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[2],
        next_scale=scales[3],
        aspect_ratios=aspect_ratios[2],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[2],
        this_offsets=offsets[2],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv6_2_mbox_priorbox')(conv6_2_mbox_loc)
    print('conv6_2_mbox_priorbox: ', conv6_2_mbox_priorbox.get_shape())
    conv7_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[3],
        next_scale=scales[4],
        aspect_ratios=aspect_ratios[3],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[3],
        this_offsets=offsets[3],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv7_2_mbox_priorbox')(conv7_2_mbox_loc)
    print('conv7_2_mbox_priorbox: ', conv7_2_mbox_priorbox.get_shape())
    conv8_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[4],
        next_scale=scales[5],
        aspect_ratios=aspect_ratios[4],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[4],
        this_offsets=offsets[4],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv8_2_mbox_priorbox')(conv8_2_mbox_loc)
    print('conv8_2_mbox_priorbox: ', conv8_2_mbox_priorbox.get_shape())
    conv9_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[5],
        next_scale=scales[6],
        aspect_ratios=aspect_ratios[5],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[5],
        this_offsets=offsets[5],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv9_2_mbox_priorbox')(conv9_2_mbox_loc)
    print('conv9_2_mbox_priorbox: ', conv9_2_mbox_priorbox.get_shape())

    ############################################################################
    # Bước 2: Reshape lại các output tensor shape
    ############################################################################

    ############################################################################
    # Bước 2.1: Reshape output của class predictions
    ############################################################################

    # Reshape các class predictions, trả về 3D tensors có shape `(batch, height * width * n_boxes, n_classes)`
    # Chúng ta muốn các classes là tách biệt nhau trên last axis để tính softmax trên chúng.
    conv4_3_norm_mbox_conf_reshape = Reshape(
        (-1, n_classes),
        name='conv4_3_norm_mbox_conf_reshape')(conv4_3_norm_mbox_conf)
    fc7_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='fc7_mbox_conf_reshape')(fc7_mbox_conf)
    conv6_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv6_2_mbox_conf_reshape')(conv6_2_mbox_conf)
    conv7_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv7_2_mbox_conf_reshape')(conv7_2_mbox_conf)
    conv8_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv8_2_mbox_conf_reshape')(conv8_2_mbox_conf)
    conv9_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv9_2_mbox_conf_reshape')(conv9_2_mbox_conf)
    print('conv4_3_norm_mbox_conf_reshape: ',
          conv4_3_norm_mbox_conf_reshape.get_shape())
    print('fc7_mbox_conf_reshape: ', fc7_mbox_conf_reshape.get_shape())
    print('conv9_2_mbox_conf_reshape: ', conv9_2_mbox_conf_reshape.get_shape())
    print('conv9_2_mbox_conf_reshape: ', conv9_2_mbox_conf_reshape.get_shape())
    print('conv9_2_mbox_conf_reshape: ', conv9_2_mbox_conf_reshape.get_shape())

    ############################################################################
    # Bước 2.2: Reshape output của bounding box predictions
    ############################################################################

    # Reshape các box predictions, trả về 3D tensors có shape `(batch, height * width * n_boxes, 4)`
    # Chúng ta muốn 4 tọa độ box là tách biệt nhau trên last axis để tính hàm smooth L1 loss
    conv4_3_norm_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv4_3_norm_mbox_loc_reshape')(conv4_3_norm_mbox_loc)
    fc7_mbox_loc_reshape = Reshape((-1, 4),
                                   name='fc7_mbox_loc_reshape')(fc7_mbox_loc)
    conv6_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv6_2_mbox_loc_reshape')(conv6_2_mbox_loc)
    conv7_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv7_2_mbox_loc_reshape')(conv7_2_mbox_loc)
    conv8_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv8_2_mbox_loc_reshape')(conv8_2_mbox_loc)
    conv9_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv9_2_mbox_loc_reshape')(conv9_2_mbox_loc)
    print('conv4_3_norm_mbox_loc_reshape: ',
          conv4_3_norm_mbox_loc_reshape.get_shape())
    print('fc7_mbox_loc_reshape: ', fc7_mbox_loc_reshape.get_shape())
    print('conv6_2_mbox_loc_reshape: ', conv6_2_mbox_loc_reshape.get_shape())
    print('conv7_2_mbox_loc_reshape: ', conv7_2_mbox_loc_reshape.get_shape())
    print('conv8_2_mbox_loc_reshape: ', conv8_2_mbox_loc_reshape.get_shape())
    print('conv9_2_mbox_loc_reshape: ', conv9_2_mbox_loc_reshape.get_shape())

    ############################################################################
    # Bước 2.3: Reshape output của anchor box
    ############################################################################

    # Reshape anchor box tensors, trả về 3D tensors có shape `(batch, height * width * n_boxes, 8)`
    conv4_3_norm_mbox_priorbox_reshape = Reshape(
        (-1, 8),
        name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox)
    fc7_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox)
    conv6_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox)
    conv7_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox)
    conv8_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox)
    conv9_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox)
    print('conv4_3_norm_mbox_priorbox_reshape: ',
          conv4_3_norm_mbox_priorbox_reshape.get_shape())
    print('fc7_mbox_priorbox_reshape: ', fc7_mbox_priorbox_reshape.get_shape())
    print('conv6_2_mbox_priorbox_reshape: ',
          conv6_2_mbox_priorbox_reshape.get_shape())
    print('conv7_2_mbox_priorbox_reshape: ',
          conv7_2_mbox_priorbox_reshape.get_shape())
    print('conv8_2_mbox_priorbox_reshape: ',
          conv8_2_mbox_priorbox_reshape.get_shape())
    print('conv9_2_mbox_priorbox_reshape: ',
          conv9_2_mbox_priorbox_reshape.get_shape())
    ### Concatenate các predictions từ các layers khác nhau

    ############################################################################
    # Bước 3: Concatenate các boxes trên layers
    ############################################################################

    ############################################################################
    # Bước 3.1: Concatenate confidence output box
    ############################################################################

    # Axis 0 (batch) và axis 2 (n_classes hoặc 4) là xác định duy nhất cho toàn bộ các predictions layer
    # nên chúng ta muốn concatenate theo axis 1, số lượng các boxes trên layer
    # Output shape của `mbox_conf`: (batch, n_boxes_total, n_classes)
    mbox_conf = Concatenate(axis=1, name='mbox_conf')([
        conv4_3_norm_mbox_conf_reshape, fc7_mbox_conf_reshape,
        conv6_2_mbox_conf_reshape, conv7_2_mbox_conf_reshape,
        conv8_2_mbox_conf_reshape, conv9_2_mbox_conf_reshape
    ])
    print('mbox_conf.shape: ', mbox_conf.get_shape())

    ############################################################################
    # Bước 3.2: Concatenate location output box
    ############################################################################

    # Output shape của `mbox_loc`: (batch, n_boxes_total, 4)
    mbox_loc = Concatenate(axis=1, name='mbox_loc')([
        conv4_3_norm_mbox_loc_reshape, fc7_mbox_loc_reshape,
        conv6_2_mbox_loc_reshape, conv7_2_mbox_loc_reshape,
        conv8_2_mbox_loc_reshape, conv9_2_mbox_loc_reshape
    ])

    print('mbox_loc.shape: ', mbox_loc.get_shape())

    ############################################################################
    # Bước 3.3: Concatenate anchor output box
    ############################################################################

    # Output shape của `mbox_priorbox`: (batch, n_boxes_total, 8)
    mbox_priorbox = Concatenate(axis=1, name='mbox_priorbox')([
        conv4_3_norm_mbox_priorbox_reshape, fc7_mbox_priorbox_reshape,
        conv6_2_mbox_priorbox_reshape, conv7_2_mbox_priorbox_reshape,
        conv8_2_mbox_priorbox_reshape, conv9_2_mbox_priorbox_reshape
    ])

    print('mbox_priorbox.shape: ', mbox_priorbox.get_shape())

    ############################################################################
    # Bước 4: Tính toán output
    ############################################################################

    ############################################################################
    # Bước 4.1 : Xây dựng các hàm loss function cho confidence
    ############################################################################

    # tọa độ của box predictions sẽ được truyền vào hàm loss function,
    # nhưng cho các dự báo lớp, chúng ta sẽ áp dụng một hàm softmax activation layer đầu tiên
    mbox_conf_softmax = Activation('softmax',
                                   name='mbox_conf_softmax')(mbox_conf)

    # Concatenate các class và box predictions và the anchors thành một large predictions vector
    # Đầu ra của `predictions`: (batch, n_boxes_total, n_classes + 4 + 8)
    predictions = Concatenate(axis=2, name='predictions')(
        [mbox_conf_softmax, mbox_loc, mbox_priorbox])
    print('predictions.shape: ', predictions.get_shape())
    if mode == 'training':
        model = Model(inputs=x, outputs=predictions)
    elif mode == 'inference':
        decoded_predictions = DecodeDetections(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    elif mode == 'inference_fast':
        decoded_predictions = DecodeDetectionsFast(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    else:
        raise ValueError(
            "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'."
            .format(mode))

    if return_predictor_sizes:
        predictor_sizes = np.array([
            conv4_3_norm_mbox_conf._keras_shape[1:3],
            fc7_mbox_conf._keras_shape[1:3],
            conv6_2_mbox_conf._keras_shape[1:3],
            conv7_2_mbox_conf._keras_shape[1:3],
            conv8_2_mbox_conf._keras_shape[1:3],
            conv9_2_mbox_conf._keras_shape[1:3]
        ])
        return model, predictor_sizes
    else:
        return model
    def build_model(self, args):
        # based  https://keras.io/getting-started/functional-api-guide/
        start = time.time()
        # CPUs are used via a "device" which is just a threadpool
        if args.nCpu > 0:
            import tensorflow as tf
            tf.Session(config=tf.ConfigProto(
                intra_op_parallelism_threads=args.nCpu))
            print('restrict CPU count to ', args.nCpu)

        dropFrac = args.dropFrac
        sh1 = self.data['train']['X'].shape

        print('build_model inp1:', sh1, 'design=', self.modelDesign)

        if self.modelDesign == 'cnn1d':  # . . . . . . . . . . . . . . .
            xa = Input(shape=(sh1[1], ), name='inp1d')
            h = Reshape((sh1[1], 1))(xa)
            kernel = 5
            pool_len = 3  # how much time_bins get reduced per pooling
            cnnDim = [2, 4]
            numCnn = len(cnnDim)
            print(' cnn1Dim:', cnnDim)

            for i in range(numCnn):
                dim = cnnDim[i]
                h = Conv1D(dim,
                           kernel,
                           activation='relu',
                           padding='valid',
                           name='cnn%d_d%d_k%d' % (i, dim, kernel))(h)
                h = MaxPool1D(pool_length=pool_len, name='pool_%d' % (i))(h)
                print('cnn 1d', i, h.get_shape())

            h = Flatten(name='to_1d')(h)

        if self.modelDesign == 'cnn2d':  # . . . . . . . . . . . . . . .
            xa = Input(shape=(
                sh1[1],
                sh1[2],
            ), name='inp2d')
            h = Reshape((sh1[1], sh1[2], 1))(xa)
            kernel = 3
            pool_len = 2  # how much time_bins get reduced per pooling
            cnnDim = [4, 8]
            numCnn = len(cnnDim)
            print(' cnn2Dim:', cnnDim)

            for i in range(numCnn):
                dim = cnnDim[i]
                h = Conv2D(dim,
                           kernel,
                           activation='relu',
                           padding='valid',
                           name='cnn%d_d%d_k%d' % (i, dim, kernel))(h)
                h = MaxPool2D(pool_size=pool_len, name='pool_%d' % (i))(h)
                print('cnn 2d', i, h.get_shape())

            h = Flatten(name='to_1d')(h)

        if self.modelDesign == 'lstm':  # . . . . . . . . . . . . . . .
            lstmDim = 10
            recDropFrac = 0.5 * dropFrac
            print(' lstmDim:', lstmDim)
            h = LSTM(lstmDim,
                     activation='tanh',
                     recurrent_dropout=recDropFrac,
                     dropout=dropFrac,
                     name='lstmA_%d' % lstmDim,
                     return_sequences=True)(h)
            h = LSTM(lstmDim,
                     activation='tanh',
                     recurrent_dropout=recDropFrac,
                     dropout=dropFrac,
                     name='lstmB_%d' % lstmDim,
                     return_sequences=False)(h)

        print('pre FC=>', h.get_shape())
        h = Dropout(dropFrac, name='dropFC')(h)

        # .... FC  layers  COMMON
        fcDim = [10, 5]
        numFC = len(fcDim)

        for i in range(numFC):
            dim = fcDim[i]
            h = Dense(dim, activation='relu', name='fc%d' % i)(h)
            h = Dropout(dropFrac, name='drop%d' % i)(h)
            print('fc', i, h.get_shape())

        y = Dense(1, activation='sigmoid', name='sigmoid')(h)

        lossName = 'binary_crossentropy'
        optimizerName = 'adam'

        print('build_model: loss=', lossName, ' optName=', optimizerName,
              ' out:', y.get_shape())
        # full model
        model = Model(inputs=xa, outputs=y)

        model.compile(optimizer=optimizerName,
                      loss=lossName,
                      metrics=['accuracy'])
        self.model = model

        model.summary()  # will print
        print('model size=%.1fK compiled elaT=%.1f sec' %
              (model.count_params() / 1000., time.time() - start))
            biLSTM_Input = Reshape((sequence_length,embedding_dim))(model1)
            left_branch = LSTM(300,input_shape = (40,300),return_sequences='True',input_length=40)(biLSTM_Input)

            right_branch = LSTM(300,input_shape=(40,300),return_sequences='True',input_length=40,go_backwards=True)(biLSTM_Input)

            print "left_branch.get_shape()",left_branch.get_shape()
            print "right_branch.get_shape()",right_branch.get_shape()
            lstm_merged = merge([left_branch,right_branch],mode='ave')
            lstm_merged = Reshape([40,300,1])(lstm_merged)
            lstm_merged = Dropout(0.2)(lstm_merged)
            graph_in_temp = merge([model1, model2,lstm_merged],mode='concat',concat_axis=-1)

            graph_in = Reshape((40,300,3))(graph_in_temp)

            print graph_in.get_shape()

            conv_11 = Convolution2D(nb_filter=feature_map, nb_row=filter_sizes[0], nb_col=col_size, border_mode='valid', activation='relu')(graph_in)
            conv_22 = Convolution2D(nb_filter=feature_map, nb_row=filter_sizes[1], nb_col=col_size, border_mode='valid', activation='relu')(graph_in)
            conv_33 = Convolution2D(nb_filter=feature_map, nb_row=filter_sizes[2], nb_col=col_size, border_mode='valid', activation='relu')(graph_in)

            conv_11 = MaxPooling2D(pool_size=(int(conv_11.get_shape()[1]),int(conv_11.get_shape()[2])))(conv_11)
            conv_22 = MaxPooling2D(pool_size=(int(conv_22.get_shape()[1]),int(conv_22.get_shape()[2])))(conv_22)
            conv_33 = MaxPooling2D(pool_size=(int(conv_33.get_shape()[1]),int(conv_33.get_shape()[2])))(conv_33)


            conva = merge([conv_11, conv_22, conv_33], mode='concat',concat_axis=-1)
            
            conva = Dropout(dropout_prob[1])(conva)

            print conva.get_shape()
Ejemplo n.º 18
0
def googleNet_n(x,
                data_format='channels_last',
                num_classes=24,
                num_layers=[1, 1, 2, 1],
                features=[1, 1, 1, 1, 1]):
    xft = Lambda(lambda v: tf_fft(v))(x)
    x = Reshape(in_shp + (1, ), input_shape=in_shp)(x)
    x = Conv2D(filters=64 * features[0],
               kernel_size=[2, 7],
               strides=[2, 2],
               data_format=data_format,
               padding='same',
               activation='relu')(x)
    x = MaxPooling2D([1, 3], strides=[1, 2], padding='same')(x)
    for dep in range(num_layers[0]):
        x = Conv2D(filters=192 * features[1],
                   kernel_size=[1, 3],
                   strides=[1, 1],
                   padding='same',
                   activation='relu')(x)
    x = MaxPooling2D([1, 3], strides=[1, 2], padding='same')(x)

    xft = Reshape(in_shp + (1, ), input_shape=in_shp)(xft)
    xft = Conv2D(filters=64 * features[0],
                 kernel_size=[2, 4],
                 strides=[2, 2],
                 data_format=data_format,
                 padding='same',
                 activation='relu')(xft)
    xft = MaxPooling2D([1, 3], strides=[1, 2], padding='same')(xft)
    for dep in range(num_layers[0]):
        xft = Conv2D(filters=192 * features[1],
                     kernel_size=[1, 3],
                     strides=[1, 1],
                     padding='same',
                     activation='relu')(xft)
    xft = MaxPooling2D([1, 3], strides=[1, 2], padding='same')(xft)
    print(x.get_shape(), xft.get_shape())
    x = keras.layers.concatenate([x, xft], axis=3)
    print(x.get_shape())
    for dep in range(num_layers[1]):
        x = inception(x,
                      height=2,
                      fs=np.array([32, 32, 32, 32, 32]) * features[2],
                      tw_tower=True)
    x = MaxPooling2D([1, 3], strides=2, padding='same')(x)
    for dep in range(num_layers[2]):
        x = inception(x,
                      height=2,
                      fs=np.array([48, 96, 48, 96, 96]) * features[3],
                      with_residual=True)
    #out_mid = out_tower(x, dr=0.3)
    #for dep in range(num_layers[3]):
    #    x = inception(x, height=2, fs=np.array([48,96,48,96,96])*features[4], with_residual=True)
    x = MaxPooling2D([2, 3], strides=2, padding='same')(x)
    for dep in range(num_layers[3]):
        x = inception(x,
                      height=1,
                      fs=np.array([32, 32, 32, 32, 32]) * features[4])
    out = out_tower(x, dr=0.5, reg=args.confireg)
    #out = Average()([out_mid, out_late])
    return out
Ejemplo n.º 19
0
max_len_1 = 10
max_len_2 = 10
embed_size = 50

# mm = cal_match_matrix(query, doc, max_len_1, max_len_2, glove, embed_size)


def test(mm, bin_num=20):
    a = 0
    for i, j in mm[:, 1]:
        a += (i + j)
    return a


inputs = Input(shape=(1, 10, 10))

conv2d = Conv2D(filters=5,
                kernel_size=3,
                data_format='channels_first',
                padding='valid',
                activation='relu')(inputs)

print(conv2d.get_shape())
# fconv2d = Flatten()(conv2d)
# print(fconv2d.get_shape())

fconv2d = Reshape((2, -1))(conv2d)
print(fconv2d.get_shape())

# a = Lambda(lambda x: test(x))(conv2d)
# print(a.get_shape())
Ejemplo n.º 20
0
    def old_model(self, input_dim = (320, 320), input_channels=1, output_channels=4, drop_out=0.0, batch_Norm=True, USE_BIAS = False, interpolation='bilinear', fullbottle=False):
        n_filters = 64
        inputs = Input((input_dim[0], input_dim[1], 3))
        #get VGG16
        vgg16 = VGG16(input_tensor=inputs, include_top=False)
#        vgg16 = VGG16( input_shape=inputShape, include_top=False) 
        for l in vgg16.layers:
            l.trainable = True
#        vgg16.summary()
        out_vgg16 = vgg16(inputs)
    
        #get vgg layer outputs    
        block1_conv2 = vgg16.get_layer("block1_conv2").output    
        block2_conv2 = vgg16.get_layer("block2_conv2").output
        block3_conv3 = vgg16.get_layer("block3_conv3").output      
        block4_conv3 = vgg16.get_layer("block4_conv3").output
        block5_conv3 = vgg16.get_layer("block5_conv3").output 
        out_vgg16 = vgg16.get_layer("block5_pool").output
  
        #--mid convolutions--
        convMid_1 = self.double_conv2D(n_filters*16, 3, out_vgg16 , batch_norm=batch_Norm, dropout=drop_out)    
        if fullbottle:
            convMid_1 = self.bn_conv2D(n_filters*16, 3, convMid_1)
#        ------- up path ---------- 
#        upconv_1 = Convolution2DTranspose(n_filters*8, (2, 2), strides=(2, 2), use_bias= USE_BIAS)(convMid_1)
#        upconv_1 = self.SubpixelConv2D((convMid_1.get_shape()[1], convMid_1.get_shape()[2], convMid_1.get_shape()[3]), scale=2, name='subpix1')(convMid_1)  
        upconv_1 = UpSampling2D((2,2), interpolation=interpolation)(convMid_1)
        conca_1 = concatenate([upconv_1, block5_conv3], axis=self.axis)
        conv_1 = self.double_conv2D(n_filters*8, 3, conca_1, batch_norm=batch_Norm, dropout=drop_out)
        conv_1 = self.bn_conv2D(n_filters*8, 3, conv_1)
                
#        upconv_2 = Convolution2DTranspose(n_filters*8, (2, 2), strides=(2, 2), use_bias= USE_BIAS)(conv_1)        
#        upconv_2 = self.SubpixelConv2D((conv_1.get_shape()[1], conv_1.get_shape()[2], conv_1.get_shape()[3]), scale=2, name='subpix2')(conv_1)
        upconv_2 = UpSampling2D((2,2), interpolation=interpolation)(conv_1)
        conca_2 = concatenate([upconv_2, block4_conv3], axis=self.axis)
        conv_2 = self.double_conv2D(n_filters*8, 3, conca_2, batch_norm=batch_Norm, dropout=drop_out)
        conv_2 = self.bn_conv2D(n_filters*8, 3, conv_2)
        
#        upconv_3 = Convolution2DTranspose(n_filters*4, (2, 2), strides=(2, 2), use_bias= USE_BIAS)(conv_2)
#        upconv_3 = self.SubpixelConv2D((conv_2.get_shape()[1], conv_2.get_shape()[2], conv_2.get_shape()[3]), scale=2, name='subpix3')(conv_2)
        upconv_3 = UpSampling2D((2,2), interpolation=interpolation)(conv_2)
        conca_3 = concatenate([upconv_3, block3_conv3], axis=self.axis)
        conv_3 = self.double_conv2D(n_filters*4, 3, conca_3, batch_norm=batch_Norm, dropout=drop_out) 
        conv_3 = self.bn_conv2D(n_filters*4, 3, conv_3)
        
#        upconv_4 = Convolution2DTranspose(n_filters*2, (2, 2), strides=(2, 2), use_bias= USE_BIAS)(conv_3)
#        upconv_4 = self.SubpixelConv2D((conv_3.get_shape()[1], conv_3.get_shape()[2], conv_3.get_shape()[3]), scale=2, name='subpix4')(conv_3)
        upconv_4 = UpSampling2D((2,2), interpolation=interpolation)(conv_3)
        conca_4 = concatenate([upconv_4, block2_conv2], axis=self.axis)
        conv_4 = self.double_conv2D(n_filters*2, 3, conca_4, batch_norm=batch_Norm, dropout=drop_out)        
    
#        upconv_5 = Convolution2DTranspose(n_filters, (2, 2), strides=(2, 2), use_bias= USE_BIAS)(conv_4)
#        upconv_5 = self.SubpixelConv2D((conv_4.get_shape()[1], conv_4.get_shape()[2], conv_4.get_shape()[3]), scale=2, name='subpi5')(conv_4)
        upconv_5 = UpSampling2D((2,2), interpolation=interpolation)(conv_4)
        conca_5 = concatenate([upconv_5, block1_conv2], axis=self.axis)
        conv_5 = self.double_conv2D(n_filters, 3, conca_5, batch_norm=batch_Norm, dropout=drop_out)    
        
        in_c = Reshape((512,512,1))(Lambda(lambda x : x[:,:,:,0])(inputs))
        print(in_c.get_shape())
        print(inputs.get_shape())    
        
        conca_6 = concatenate([conv_5, in_c], axis=self.axis)
        out = Conv2D(output_channels, (1, 1))(conca_6)
        out = Activation('softmax')(out)
        model = Model(input=inputs, output=out, name="unet_vgg16")
        return model 
Ejemplo n.º 21
0
def ResNetGenerator(c):
    """
    Returns
        A Keras model instance.
    """
    print('#' * 10, ' Create generator ', '#' * 10)
    net = {}
    signal_input = Input(shape=(c.audio_size, ))
    x = Reshape([c.audio_size, 1])(signal_input)

    # COMPRESS
    x = Conv1D(64, 7, strides=2, padding='same', name='conv1')(x)
    x = BatchNormalization(name='bn_conv1')(x)
    x = LeakyReLU(alpha=0.3)(x)
    print('COMPRESSION')
    print(c.audio_size, '-> ', x.get_shape().as_list())

    for i in range(1, c.n_compress_block + 1):
        net[i] = x
        print(x.get_shape().as_list(), '-> ', end='')
        x = conv_block(x,
                       3, [
                           i**2 * c.convo_size, i**2 * c.convo_size,
                           4 * i**2 * c.convo_size
                       ],
                       stage=i,
                       block='a')
        print(x.get_shape().as_list())
        x = identity_block(x,
                           3, [
                               i**2 * c.convo_size, i**2 * c.convo_size,
                               4 * i**2 * c.convo_size
                           ],
                           stage=i,
                           block='b')
        x = identity_block(x,
                           3, [
                               i**2 * c.convo_size, i**2 * c.convo_size,
                               4 * i**2 * c.convo_size
                           ],
                           stage=i,
                           block='c')
    [print(k, v) for k, v in net.items()]
    print('\nAfter compression', x, '\n')

    # DECOMPRESS
    print('DECOMPRESSION')
    for i in range(c.n_compress_block, 0, -1):
        print(i, end=' ')
        print(x.get_shape().as_list(), '-> ', end='')
        x = deconv_block(x,
                         3, [
                             i**2 * c.convo_size, i**2 * c.convo_size,
                             4 * i**2 * c.convo_size
                         ],
                         stage=i,
                         block='a_incr')
        print(x.get_shape().as_list())
        x = Concatenate(axis=2)([net[i], x])
        x = identity_block(x,
                           3, [
                               i**2 * c.convo_size, i**2 * c.convo_size,
                               4 * i**2 * c.convo_size
                           ],
                           stage=i,
                           block='b_incr')
        x = identity_block(x,
                           3, [
                               i**2 * c.convo_size, i**2 * c.convo_size,
                               4 * i**2 * c.convo_size
                           ],
                           stage=i,
                           block='c_incr')
    print(x.get_shape().as_list(), '-> ', end='')
    x = deconv_block(
        x,
        3, [i**2 * c.convo_size, i**2 * c.convo_size, 4 * i**2 * c.convo_size],
        stage=42,
        block='a_incr')
    x = identity_block(
        x,
        3, [i**2 * c.convo_size, i**2 * c.convo_size, 4 * i**2 * c.convo_size],
        stage=42,
        block='c_incr')
    print(x.get_shape().as_list())
    x = Conv1D(1, 1, strides=1, padding='same')(x)
    x = Reshape((-1, ))(x)
    signal_output = Activation('tanh')(x)
    print('Recovered tensor', signal_output)
    # Create model.
    model = Model(signal_input, signal_output)

    return model
Ejemplo n.º 22
0
def create_model(num_input_channels,vocab_sizes,output_size_embedding,input_shapes,num_LSTM_layers,num_LSTM_units,num_dense_layers,num_dense_units,learning_rate,reg_param,inp_optim):
    
    outputs=[]
    inputs=[]
    squeezed=[]   
    # Input Layer Constrution: Categorical text Inputs:
    
    for i_channels in range(num_input_channels):
        name_string="input"+str(i_channels+1)
        inputs.append(Input(shape=(input_shapes[i_channels][1],1),name=name_string))
        print(inputs[i_channels].shape)
   
    # Squeeze to enable input to embedding layer:

    for i_channels in range(num_input_channels):
        squeezed.append(Lambda(lambda x:keras.backend.squeeze(x,axis=-1))(inputs[i_channels]))
        print(squeezed[i_channels].shape)
    # Embedding Layers Construction: Categorical text inputs
    for i_channels in range(num_input_channels):
        outputs.append(Embedding(input_dim=vocab_sizes[i_channels],output_dim=output_size_embedding,input_length=input_shapes[i_channels][1])(squeezed[i_channels]))

    #LSTM Layers Construction: Categorical text inputs
    #-----------------------------------------------------
    for i_channels in range(num_input_channels):
        
        for i_individual in range(num_LSTM_layers[i_channels]):

            if num_LSTM_layers[i_channels]==1:
                outputs[i_channels]=LSTM(num_LSTM_units[i_channels][i_individual], activation='relu')(outputs[i_channels])
            else:
                if i_individual==0:
                    outputs[i_channels]=LSTM(num_LSTM_units[i_channels][i_individual], activation='relu', return_sequences=True)(outputs[i_channels])
                elif i_individual==num_LSTM_layers[i_channels]-1:
                    outputs[i_channels]=LSTM(num_LSTM_units[i_channels][i_individual], activation='relu')(outputs[i_channels])
        
        
                else:
                    outputs[i_channels]=LSTM(num_LSTM_units[i_channels][i_individual], activation='relu', return_sequences=True)(outputs[i_channels])
                
    #-------------------------------------------------------
    print("LSTM Layers Constructed")
    
    # Concatenate the outputs of all LSTM layers
    concat=[]
    #print(outputs[0].get_shape().as_list())
    #print(outputs[1].get_shape().as_list())
    #print(outputs[2].get_shape().as_list())
    #print(outputs[3].get_shape().as_list())
    for i_concat in range(num_input_channels-1):
        if i_concat==0:
            output=concatenate([outputs[0],outputs[1]])
    #        print(output.get_shape().as_list())
        else:
            output=concatenate([output,outputs[i_concat+1]])
    #Dense Layers Construction
    #-------------------------------------------------------
    
    print(output.get_shape().as_list()) 
    print("Create Dense Layers")
    
    #-----------------------------
    #Dense Layer Creation
    output=Reshape((-1,1))(output)
    print(output.get_shape().as_list())

    input1=Input(shape=(1,1)) # inputs
     # 
    output=concatenate([output,input1],axis=1) #add inputs to LSTM outputs
    output=Flatten()(output)

    for i_dense in range(num_dense_layers):
        if i_dense==num_dense_layers-1:
            output=Dense(num_dense_units[i_dense],activation='softmax',kernel_regularizer=regularizers.l2(reg_param))(output)
        else:
            output=Dense(num_dense_units[i_dense],activation='relu',kernel_regularizer=regularizers.l2(reg_param))(output)
            output=BatchNormalization()(output)
    #----------------------------
    print("Done")    
    inputs.append(input1)
    
    model=Model(inputs=inputs,outputs=output)
    optim=Adam(lr=learning_rate,clipnorm=1.,amsgrad=True)
    model.compile(optimizer=optim,loss="categorical_crossentropy",metrics=["accuracy"])

    model_json = model.to_json()
    with open("model.json", "w") as json_file:
    	json_file.write(model_json)

    model.summary()

    return model
Ejemplo n.º 23
0
def draw_capsnet_model(hyper_param, embedding_matrix=None, verbose=True):
    """
    Input: hyper parameters dictionary
    
    Construct:
        input layers : x , x_pos(o), x_captialization(o)
        embedding matrix : use_glove or randomly initialize
        conv1 : first convolution layer
        primarycaps : conv2 and squash function applied
        ner_caps : make 8 ner capsules of specified dim
        out_pred : calc length of 8 ner capsules as 8 prob. predictions over 8 ner classes
    
    Returns: 
        if decoding/reconstruction disabled --> a single keras.models.Model object
        if decoding/reconstruction enabled --> three keras.models.Model objects
    """

    # input layer(s)
    x = Input(shape=(hyper_param['maxlen'], ), name='x')
    if hyper_param['use_pos_tags']:
        x_pos = Input(shape=(hyper_param['maxlen'], hyper_param['poslen']),
                      name='x_pos')
    if hyper_param['use_capitalization_info']:
        x_capital = Input(shape=(hyper_param['maxlen'],
                                 hyper_param['capitallen']),
                          name='x_capital')

    # embedding matrix
    if hyper_param['use_glove']:
        embed = Embedding(hyper_param['max_features'], hyper_param['embed_dim'], weights=[embedding_matrix],\
                          input_length=hyper_param['maxlen'], trainable=hyper_param['allow_glove_retrain'])(x)
    else:
        embed = Embedding(hyper_param['max_features'], hyper_param['embed_dim'], input_length=hyper_param['maxlen'],\
                          embeddings_initializer="random_uniform" )(x)

    # concat embeddings with additional features
    if hyper_param['use_pos_tags'] and hyper_param['use_capitalization_info']:
        embed = Concatenate(axis=-1)([embed, x_pos, x_capital])
    elif hyper_param['use_pos_tags'] and (
            not hyper_param['use_capitalization_info']):
        embed = Concatenate(axis=-1)([embed, x_pos])
    elif (not hyper_param['use_pos_tags']
          ) and hyper_param['use_capitalization_info']:
        embed = Concatenate(axis=-1)([embed, x_capital])
    else:
        embed = embed

    # add dropout here
    if hyper_param['embed_dropout'] > 0.0:
        embed = SpatialDropout1D(hyper_param['embed_dropout'])(embed)

    # feed embeddings into conv1
    conv1 = Conv1D( filters=hyper_param['conv1_filters'], \
                   kernel_size=hyper_param['conv1_kernel_size'],\
                   strides=hyper_param['conv1_strides'], \
                   padding=hyper_param['conv1_padding'],\
                   activation='relu', name='conv1')(embed)

    # make primary capsules
    if hyper_param['use_2D_primarycaps']:
        convShape = conv1.get_shape().as_list()
        conv1 = Reshape((convShape[1], convShape[2], 1))(conv1)
        primaryCapLayer = PrimaryCap
    else:
        primaryCapLayer = PrimaryCap1D

    # make primary capsules
    primarycaps = primaryCapLayer(conv1, \
                             dim_capsule=hyper_param['primarycaps_dim_capsule'],\
                             n_channels=hyper_param['primarycaps_n_channels'],\
                             kernel_size=hyper_param['primarycaps_kernel_size'], \
                             strides=hyper_param['primarycaps_strides'], \
                             padding=hyper_param['primarycaps_padding'])

    # make ner capsules
    ner_caps = CapsuleLayer(num_capsule=hyper_param['ner_classes'], \
                            dim_capsule=hyper_param['ner_capsule_dim'], \
                            routings=hyper_param['num_dynamic_routing_passes'], \
                            name='nercaps')(primarycaps)

    # replace each ner capsuel with its length
    out_pred = Length(name='out_pred')(ner_caps)

    if verbose:
        print("x", x.get_shape())
        if hyper_param['use_pos_tags']: print("x_pos", x_pos.get_shape())
        if hyper_param['use_capitalization_info']:
            print("x_capital", x_capital.get_shape())
        print("embed", embed.get_shape())
        print("conv1", conv1.get_shape())
        print("primarycaps", primarycaps.get_shape())
        print("ner_caps", ner_caps.get_shape())
        print("out_pred", out_pred.get_shape())

    if hyper_param['use_decoder']:
        decoder_y_cat = Input(shape=(hyper_param['ner_classes'], ),
                              name='decoder_y_cat')
        masked_by_y = Mask(name='masked_by_y')(
            [ner_caps,
             decoder_y_cat])  # true label is used to mask during training
        masked = Mask()(
            ner_caps)  # mask using capsule with maximal length for predicion

        # decoder for training
        train_decoder_dense1 = Dense(hyper_param['decoder_feed_forward_1'], activation='relu',\
                               input_dim=hyper_param['ner_capsule_dim']*hyper_param['ner_classes'],\
                               name='train_decoder_dense1')(masked_by_y)
        train_decoder_dense1_dropout = Dropout(
            hyper_param['decoder_dropout'])(train_decoder_dense1)
        train_decoder_dense2 = Dense(hyper_param['decoder_feed_forward_2'], activation='relu',\
                                     name='train_decoder_dense2')(train_decoder_dense1_dropout)
        train_decoder_dense2_dropout = Dropout(
            hyper_param['decoder_dropout'])(train_decoder_dense2)
        train_decoder_output = Dense(hyper_param['embed_dim'], activation=None,\
                                     name='train_decoder_output')(train_decoder_dense2_dropout)

        # decoder for evaluation (prediction)
        eval_decoder_dense1 = Dense(hyper_param['decoder_feed_forward_1'], activation='relu',\
                               input_dim=hyper_param['ner_capsule_dim']*hyper_param['ner_classes'],\
                               name='eval_decoder_dense1')(masked)
        eval_decoder_dense2 = Dense(hyper_param['decoder_feed_forward_2'], activation='relu',\
                                     name='eval_decoder_dense2')(eval_decoder_dense1)
        eval_decoder_output = Dense(hyper_param['embed_dim'], activation=None,\
                                     name='eval_decoder_output')(eval_decoder_dense2)

        if verbose:
            print("Decoder model enabled for GloVe vector deconstruction...")
            print("decoder_y_cat", decoder_y_cat.get_shape())
            print("masked_by_y", masked_by_y.get_shape())
            print("train_decoder_dense1", train_decoder_dense1.get_shape())
            print("train_decoder_dense1_dropout",
                  train_decoder_dense1_dropout.get_shape())
            print("train_decoder_dense2", train_decoder_dense2.get_shape())
            print("train_decoder_dense2_dropout",
                  train_decoder_dense2_dropout.get_shape())
            print("train_decoder_output", train_decoder_output.get_shape())
            print("masked", masked.get_shape())
            print("eval_decoder_dense1", eval_decoder_dense1.get_shape())
            print("eval_decoder_dense2", eval_decoder_dense2.get_shape())
            print("eval_decoder_output", eval_decoder_output.get_shape())

    # construct input list
    if hyper_param['use_pos_tags'] and hyper_param['use_capitalization_info']:
        input_list = [x, x_pos, x_capital]
    elif hyper_param['use_pos_tags'] and (
            not hyper_param['use_capitalization_info']):
        input_list = [x, x_pos]
    elif (not hyper_param['use_pos_tags']
          ) and hyper_param['use_capitalization_info']:
        input_list = [x, x_capital]
    else:
        input_list = [x]

    if hyper_param['use_decoder'] == False:
        print("decoder/reconstruction DISabled")
        print("returning 1 model")
        return Model(inputs=input_list, outputs=[out_pred])
    else:
        train_model = Model(inputs=input_list + [decoder_y_cat],
                            outputs=[out_pred, train_decoder_output])
        eval_model = Model(inputs=input_list,
                           outputs=[out_pred, eval_decoder_output])
        print("decoder/reconstruction enabled")
        print("returning a list of 2 models: train_model, eval_model")
        return train_model, eval_model