def skip_connection(inputs, residual, stochastic=False, stochastic_layers=None): inputs_filters = inputs._keras_shape[1] residual_filters = residual._keras_shape[1] subsample = np.array(inputs._keras_shape[2:]) // np.array( residual._keras_shape[2:]) inputs = dropout()(inputs) if (inputs_filters != residual_filters) or np.any(subsample > 1): skip = conv2(residual_filters, 1, 1, subsample=subsample, bias=False)(inputs) else: skip = inputs if not stochastic: return merge((skip, residual), mode='sum') else: scale = ScaleInTestPhase(death_rate) skip = scale(skip) out = merge([skip, residual], mode="sum") rs = RandomSwitch(death_rate) stochastic_layers.append((rs.death_rate, scale.death_rate)) return rs([out, residual])
def test_gan_custom_layer_graph(): z_shape = (1, 8, 8) z = Input(shape=z_shape, name='z') gen_cond = Input(shape=(1, 8, 8), name='gen_cond') inputs = [z, gen_cond] gen_input = merge(inputs, mode='concat', concat_axis=1) gen_output = Convolution2D(1, 2, 2, activation='relu', name='g1', border_mode='same')(gen_input) generator = Container(inputs, gen_output) f, r = Input(z_shape, name='fake'), Input(z_shape, name='real') inputs = [f, r] dis_input = merge(inputs, mode='concat', concat_axis=0) dis_conv = Convolution2D(5, 2, 2, name='d1', activation='relu')(dis_input) dis_flatten = Flatten()(dis_conv) dis = Dense(1, activation='sigmoid')(dis_flatten) discriminator = Container(inputs, gan_outputs(dis)) gan = GAN(generator, discriminator, z_shape=z_shape, real_shape=z_shape) gan.build('adam', 'adam', gan_binary_crossentropy) fn = gan.compile_custom_layers(['g1', 'd1']) z = np.random.uniform(-1, 1, (64,) + z_shape) real = np.random.uniform(-1, 1, (64,) + z_shape) cond = np.random.uniform(-1, 1, (64,) + z_shape) print(z.shape) print(real.shape) print(cond.shape) fn({'z': z, 'gen_cond': cond, 'real': real})
def MultiEmbedding_Glove_Bidirectional_VQA_Model_FusionLast(self, params): self.ids_inputs = params["INPUTS_IDS_MODEL"] self.ids_outputs = params["OUTPUTS_IDS_MODEL"] # Prepare GLOVE vectors for text embedding initialization embedding_weights = np.random.rand( params['INPUT_VOCABULARY_SIZE'], params['TEXT_EMBEDDING_HIDDEN_SIZE']) for word, index in self.vocabularies[ self.ids_inputs[0]]['words2idx'].iteritems(): if self.word_vectors.get(word) is not None: embedding_weights[index, :] = self.word_vectors[word] self.word_vectors = {} # Question model question = Input(name=self.ids_inputs[0], shape=tuple([params['MAX_INPUT_TEXT_LEN']]), dtype='int32') text_embedding = Embedding(params['INPUT_VOCABULARY_SIZE'], params['TEXT_EMBEDDING_HIDDEN_SIZE'], input_length=params['MAX_INPUT_TEXT_LEN'], weights=[embedding_weights], trainable=params['GLOVE_VECTORS_TRAINABLE'], mask_zero=True, name='text_embedding')(question) lstm_forward = LSTM(params['LSTM_ENCODER_HIDDEN_SIZE'], name='forward', return_sequences=False)(text_embedding) lstm_backward = LSTM(params['LSTM_ENCODER_HIDDEN_SIZE'], name='backward', go_backwards=True, return_sequences=False)(text_embedding) lstm_text = merge([lstm_forward, lstm_backward], mode='concat') # Image model image = Input(name=self.ids_inputs[1], shape=tuple([params['IMG_FEAT_SIZE']])) image_embedding = Dense(params['IMG_EMBEDDING_HIDDEN_SIZE'], name='image_embedding')(image) if params['USE_BATCH_NORMALIZATION']: image_embedding = BatchNormalization( name='batch_normalization_image_embedding')(image_embedding) if params['USE_PRELU']: image_embedding = PReLU()(image_embedding) # Multimodal model image_text = merge([lstm_text, image_embedding], mode=params['MULTIMODAL_MERGE_MODE']) if params['USE_DROPOUT']: image_text = Dropout(0.5)(image_text) # Classifier classifier = Dense( params['OUTPUT_VOCABULARY_SIZE'], name=self.ids_outputs[0], activation=params['CLASSIFIER_ACTIVATION'])(image_text) self.model = Model(input=[question, image], output=classifier)
def MultiEmbedding_Glove_Bidirectional_DeepSoftmax_VQA_Model_FusionLast(self, params): self.ids_inputs = params["INPUTS_IDS_MODEL"] self.ids_outputs = params["OUTPUTS_IDS_MODEL"] # Prepare GLOVE vectors for text embedding initialization embedding_weights = np.random.rand(params['INPUT_VOCABULARY_SIZE'], params['TEXT_EMBEDDING_HIDDEN_SIZE']) for word, index in self.vocabularies[self.ids_inputs[0]]['words2idx'].iteritems(): if self.word_vectors.get(word) is not None: embedding_weights[index, :] = self.word_vectors[word] self.word_vectors = {} # Question model question = Input(name=self.ids_inputs[0], shape=tuple([params['MAX_INPUT_TEXT_LEN']]), dtype='int32') text_embedding = Embedding(params['INPUT_VOCABULARY_SIZE'], params['TEXT_EMBEDDING_HIDDEN_SIZE'], input_length=params['MAX_INPUT_TEXT_LEN'],weights=[embedding_weights], trainable=params['GLOVE_VECTORS_TRAINABLE'], mask_zero=True, name='text_embedding')(question) lstm_forward = LSTM(params['LSTM_ENCODER_HIDDEN_SIZE'], name='forward', return_sequences=False)(text_embedding) lstm_backward = LSTM(params['LSTM_ENCODER_HIDDEN_SIZE'], name='backward', go_backwards=True, return_sequences=False)(text_embedding) lstm_text = merge([lstm_forward, lstm_backward], mode='concat') # Image model image = Input(name=self.ids_inputs[1], shape=tuple([params['IMG_FEAT_SIZE']])) image_embedding = Dense(params['IMG_EMBEDDING_HIDDEN_SIZE'], name='image_embedding')(image) if params['USE_BATCH_NORMALIZATION']: image_embedding = BatchNormalization(name='batch_normalization_image_embedding')(image_embedding) if params['USE_PRELU']: image_embedding = PReLU()(image_embedding) # Multimodal model image_text = merge([lstm_text, image_embedding], mode=params['MULTIMODAL_MERGE_MODE']) if params['USE_DROPOUT']: image_text = Dropout(0.5)(image_text) for n_layer, size in enumerate(params['DEEP_SOFTMAX_LAYERS_SIZE']): if n_layer==0: fc = Dense(size, name='fc_'+str(n_layer))(image_text) else: fc = Dense(size, name='fc_'+str(n_layer))(fc) if params['USE_BATCH_NORMALIZATION']: fc = BatchNormalization()(fc) if params['USE_PRELU']: fc = PReLU()(fc) if params['USE_DROPOUT']: fc = Dropout(0.5)(fc) # Softmax classifier if len(params['DEEP_SOFTMAX_LAYERS_SIZE']) > 0: # deep MLP classifier = Dense(params['OUTPUT_VOCABULARY_SIZE'], name=self.ids_outputs[0], activation=params['CLASSIFIER_ACTIVATION'])(fc) else: classifier = Dense(params['OUTPUT_VOCABULARY_SIZE'], name=self.ids_outputs[0], activation=params['CLASSIFIER_ACTIVATION'])(image_text) # Classifier self.model = Model(input=[question, image], output=classifier)
def generator(inputs): z, = inputs z_driver = Split(*z_for_driver, axis=1)(z) z_offset = Split(*z_for_offset, axis=1)(z) z_bits = Split(*z_for_bits, axis=1)(z) bits = get_bits(z_bits) driver = mask_driver(z_driver) driver_norm = NormSinCosAngle(0, name='driver_norm')(driver) mask_input = concat([bits, driver_norm], name='mask_gen_input') mask, mask_depth_map = mask_generator(mask_input) mask = name_tensor(mask, 'mask') mask_depth_map = name_tensor(mask_depth_map, 'mask_depth_map') selection = with_regularizer(Selection(threshold=-0.08, smooth_threshold=0.2, sigma=1.5, name='selection'), MinCoveredRegularizer()) mask_down = PyramidReduce()(mask) mask_selection = selection(mask) out_offset_front = offset_front([z_offset, ZeroGradient()(driver_norm)]) light_outs = list(lighting_generator( [out_offset_front, light_merge_mask16(mask_depth_map)])) mask_with_lighting = AddLighting( scale_factor=0.6, shift_factor=0.75, name='mask_with_lighting')([mask] + light_outs) out_offset_middle = offset_middle( [out_offset_front, offset_merge_mask16(mask_depth_map), offset_merge_light16(concat(light_outs))]) offset_back_feature_map, out_offset_back = offset_back( [out_offset_middle, offset_merge_mask32(mask_down)]) mask_weight64 = mask_weight_blending64(out_offset_middle) blending = PyramidBlending(offset_pyramid_layers=2, mask_pyramid_layers=2, mask_weights=['variable', 1], offset_weights=[1, 1], use_selection=[True, True], name='blending')( [out_offset_back, mask_with_lighting, mask_selection, mask_weight64 ]) mask_post = mask_postprocess( [blending, mask_selection, mask, out_offset_back, offset_back_feature_map] + light_outs) mask_post = name_tensor(mask_post, 'mask_post') mask_post_high = HighPass(4, nb_steps=4, name='mask_post_high')(mask_post) blending_post = merge([mask_post_high, blending], mode='sum', name='blending_post') return LinearInBounds(-1.2, 1.2, name='generator')(blending_post)
def _build_multi_gpu_model(blueprint, devices): import tensorflow as tf model = _build_single_device_model(blueprint, cpu_device()) gpu_devices = [d for d in devices if is_gpu_device(d)] gpu_count = len(gpu_devices) def get_input(data, idx, parts): shape = tf.shape(data) size = tf.concat([shape[:1] // parts, shape[1:]], 0) stride = tf.concat([shape[:1] // parts, shape[1:] * 0], 0) start = stride * idx return tf.slice(data, start, size) outputs = [] for i, device in enumerate(gpu_devices): with tf.device(device): x = model.inputs[0] input_shape = tuple(x.get_shape().as_list())[1:] model_input = Lambda(get_input, output_shape=input_shape, arguments={ 'idx': i, 'parts': gpu_count })(x) outputs.append(model(model_input)) with tf.device(cpu_device()): output = merge(outputs, mode='concat', concat_axis=0) return MultiGpuModel(model, model_input=model.inputs, model_output=output)
def test_collect_layers_mimo(): x = Input(shape=(5, )) y = Input(shape=(5, )) layer_a = Dense(20) layer_b = Dense(20) layer_c = Dense(20) layer_d = Dense(20) layer_e = Dense(20) a = layer_a(x) b = layer_b(y) m = merge([a, b]) c = layer_c(m) d = layer_d(m) e = layer_e(d) layers = collect_layers([x, y], [c, e]) # pytest.set_trace() assert layer_a in layers assert layer_b in layers assert m._keras_history[0] in layers assert layer_c in layers assert layer_d in layers assert layer_e in layers layers = collect_layers([x, y], [e]) assert layer_c not in layers # missing inputs are detected with pytest.raises(Exception): layers = collect_layers([x], [c, e])
def test_gan_get_config(tmpdir): z_shape = (1, 8, 8) z = Input(z_shape, name='z') g_out = Convolution2D(10, 2, 2, activation='relu', border_mode='same')(z) generator = Container(z, g_out) f, r = Input(z_shape, name='f'), Input(z_shape, name='r') dis_input = merge([f, r], mode='concat', concat_axis=1) dis_conv = Convolution2D(5, 2, 2, activation='relu')(dis_input) dis_flatten = Flatten()(dis_conv) dis = Dense(1, activation='sigmoid')(dis_flatten) discriminator = Container([f, r], gan_outputs(dis)) gan = GAN(generator, discriminator, z_shape, z_shape) weights_fname = str(tmpdir.mkdir("weights").join("{}.hdf5")) gan.save_weights(weights_fname) true_config = gan.get_config() import json with open(os.path.join(TEST_OUTPUT_DIR, "true_config.json"), 'w+') as f: json.dump(true_config, f, indent=2) gan_from_config = layer_from_config(true_config, custom_objects={ 'GAN': GAN, 'Split': Split, }) with open(os.path.join(TEST_OUTPUT_DIR, "loaded_config.json"), 'w+') as f: json.dump(gan_from_config.get_config(), f, indent=2) gan_from_config.load_weights(weights_fname)
def bottleneck(encoder, output, upsample=False, reverse_module=False): internal = output / 4 input_stride = 2 if upsample else 1 x = Convolution2D(internal, input_stride, input_stride, border_mode='same', bias=False)(encoder) x = BatchNormalization(momentum=0.1)(x) x = Activation('relu')(x) if not upsample: x = Convolution2D(internal, 3, 3, border_mode='same', bias=True)(x) else: b, w, h, nb_filters = encoder.get_shape().as_list() in_shape = x.get_shape().as_list() x = Deconvolution2D(internal, 3, 3, output_shape=(None, w * 2, h * 2, internal), border_mode='same', subsample=(2, 2), input_shape=in_shape)(x) x = BatchNormalization(momentum=0.1)(x) x = Activation('relu')(x) x = Convolution2D(output, 1, 1, border_mode='same', bias=False)(x) other = encoder if encoder.get_shape()[-1] != output or upsample: other = Convolution2D(output, 1, 1, border_mode='same', bias=False)(other) other = BatchNormalization(momentum=0.1)(other) if upsample and reverse_module: other = UpSampling2D(size=(2, 2))(other) if not upsample or reverse_module: x = BatchNormalization(momentum=0.1)(x) else: return x decoder = merge([x, other], mode='sum') decoder = Activation('relu')(decoder) return decoder
def exp_cnn(): global X_DIM, Y_DIM # Load Embeddings matrix embedding_weights = joblib.load(config.DUMPED_VECTOR_DIR + 'mb_voc_embeddings.pkl') sentence = Input(shape=(max_len,), dtype='float32', name='w1') embedding_layer = Embedding(input_dim=max_features, output_dim=embedding_dims, weights=[embedding_weights], ) sentence_emb = embedding_layer(sentence) dropout_1 = Dropout(0.2, name='emb_dropout') sentence_drop = dropout_1(sentence_emb) cnn_layers = [Convolution1D(filter_length=filter_length, nb_filter=512, activation='relu', border_mode='same') for filter_length in [1, 2, 3, 5]] merged_cnn = merge([cnn(sentence_drop) for cnn in cnn_layers], mode='concat', concat_axis=-1) # pooling_layer = MaxPooling1D(2, name='maxpool')(merged_cnn) attention = AttLayer(name='att')(merged_cnn) # flatten_layer = Flatten()(attention) cnn_model = Dense(1, init='normal', activation='sigmoid')(attention) model_cnn = Model(input=[sentence], output=[cnn_model], name='cnn_model') print(model_cnn.summary()) model_cnn.compile(loss='mae', optimizer='adam') # print(cnn_model.summary()) return model_cnn
def MultiEmbedding_VQA_Model_FusionLast(self, params): self.ids_inputs = params["INPUTS_IDS_MODEL"] self.ids_outputs = params["OUTPUTS_IDS_MODEL"] # Question model question = Input(name=self.ids_inputs[0], shape=tuple([params['MAX_INPUT_TEXT_LEN']]), dtype='int32') text_embedding = Embedding(params['INPUT_VOCABULARY_SIZE'], params['TEXT_EMBEDDING_HIDDEN_SIZE'], input_length=params['MAX_INPUT_TEXT_LEN'], mask_zero=True, name='text_embedding')(question) lstm_text = LSTM(params['LSTM_ENCODER_HIDDEN_SIZE'], name='lstm', return_sequences=False)(text_embedding) # Image model image = Input(name=self.ids_inputs[1], shape=tuple([params['IMG_FEAT_SIZE']])) image_embedding = Dense(params['IMG_EMBEDDING_HIDDEN_SIZE'], name='image_embedding')(image) if params['USE_BATCH_NORMALIZATION']: image_embedding = BatchNormalization(name='batch_normalization_image_embedding')(image_embedding) if params['USE_PRELU']: image_embedding = PReLU()(image_embedding) # Multimodal model image_text = merge([lstm_text, image_embedding], mode=params['MULTIMODAL_MERGE_MODE']) if params['USE_DROPOUT']: image_text = Dropout(0.5)(image_text) # Classifier classifier = Dense(params['OUTPUT_VOCABULARY_SIZE'], name=self.ids_outputs[0], activation=params['CLASSIFIER_ACTIVATION'])(image_text) self.model = Model(input=[question, image], output=classifier)
def denseBlock_up(x, n_layers, growth_rate, n_filter, dropout_fraction): past_features = [x] # We store now the output of the next dense block in a list. # We will only upsample these new feature maps block_to_upsample = [] if K.image_dim_ordering() == 'th': concat_axis = 1 elif K.image_dim_ordering() == 'tf': concat_axis = -1 for i in range(n_layers): x = BatchNormalization(mode=0, axis=concat_axis)(x) x = Activation('relu')(x) x = Convolution2D(growth_rate, 3, 3, border_mode='same')(x) if dropout_fraction != 0: x = Dropout(dropout_fraction)(x) block_to_upsample.append(x) past_features.append(x) x = merge(past_features, mode='concat', concat_axis=concat_axis) n_filter += growth_rate output = x return output, n_filter, block_to_upsample
def test_collect_layers_mimo(): x = Input(shape=(5,)) y = Input(shape=(5,)) layer_a = Dense(20) layer_b = Dense(20) layer_c = Dense(20) layer_d = Dense(20) layer_e = Dense(20) a = layer_a(x) b = layer_b(y) m = merge([a, b]) c = layer_c(m) d = layer_d(m) e = layer_e(d) layers = collect_layers([x, y], [c, e]) # pytest.set_trace() assert layer_a in layers assert layer_b in layers assert m._keras_history[0] in layers assert layer_c in layers assert layer_d in layers assert layer_e in layers layers = collect_layers([x, y], [e]) assert layer_c not in layers # missing inputs are detected with pytest.raises(Exception): layers = collect_layers([x], [c, e])
def denseBlock(x, n_layers, growth_rate, n_filter, dropout_fraction): past_features = [x] if K.image_dim_ordering() == 'th': concat_axis = 1 elif K.image_dim_ordering() == 'tf': concat_axis = -1 for i in range(n_layers): x = BatchNormalization(mode=0, axis=concat_axis)(x) x = Activation('relu')(x) x = Convolution2D(growth_rate, 1, 1, border_mode='same')(x) if dropout_fraction != 0: x = Dropout(dropout_fraction)(x) x = BatchNormalization(mode=0, axis=concat_axis)(x) x = Activation('relu')(x) x = Convolution2D(growth_rate, 3, 3, border_mode='same')(x) if dropout_fraction != 0: x = Dropout(dropout_fraction)(x) past_features.append(x) x = merge(past_features, mode='concat', concat_axis=concat_axis) n_filter += growth_rate output = x return output, n_filter
def concat(tensors, axis=1, name=None, output_shape=None): if type(tensors) not in (list, tuple): return tensors elif len(tensors) == 1: return tensors[0] return merge(tensors, mode='concat', concat_axis=axis, name=name, output_shape=output_shape)
def get_generator(): z = Input(shape=z_shape, name='z') inputs = [z, gen_cond] gen_input = merge(inputs, mode='concat', concat_axis=1) gen_output = Convolution2D(10, 2, 2, activation='relu', border_mode='same')(gen_input) return Container(inputs, gen_output)
def get_discriminator(): f, r = Input(z_shape, name='f'), Input(z_shape, name='r') inputs = [f, r] dis_input = merge(inputs, mode='concat', concat_axis=1) dis_conv = Convolution2D(5, 2, 2, activation='relu')(dis_input) dis_flatten = Flatten()(dis_conv) dis = Dense(1, activation='sigmoid')(dis_flatten) return Container(inputs, gan_outputs(dis))
def f(inputs): x = norm_act_block()(inputs) x = conv2(nb_filter, 3, 3)(x) x = dropout()(x) x = norm_act_block()(x) x = conv2(nb_filter, 3, 3)(x) if inputs._keras_shape != x._keras_shape: inputs = conv2(nb_filter, 1, 1, bias=False)(inputs) if not stochastic: return merge((inputs, x), mode='sum') scale = ScaleInTestPhase(death_rate) x = scale(x) out = merge([inputs, x], mode="sum", output_shape=x._keras_shape[1:]) rs = RandomSwitch(death_rate) stochastic_layers.append((rs.death_rate, scale.death_rate)) return rs([out, inputs])
def f(inputs): x = norm_act_block()(inputs) x = conv2(nb_filter, 3, 3, subsample=(2, 2))(x) x = dropout()(x) x = norm_act_block()(x) x = conv2(nb_filter, 3, 3)(x) inputs_bottleneck = conv2(nb_filter, 1, 1, subsample=(2, 2), bias=False)(inputs) s = merge((inputs_bottleneck, x), mode='sum') return s
def skip_connection(inputs, residual, stochastic=False, stochastic_layers=None): inputs_filters = inputs._keras_shape[1] residual_filters = residual._keras_shape[1] subsample = np.array(inputs._keras_shape[2:]) // np.array(residual._keras_shape[2:]) inputs = dropout()(inputs) if (inputs_filters != residual_filters) or np.any(subsample > 1): skip = conv2(residual_filters, 1, 1, subsample=subsample, bias=False)(inputs) else: skip = inputs if not stochastic: return merge((skip, residual), mode='sum') else: scale = ScaleInTestPhase(death_rate) skip = scale(skip) out = merge([skip, residual], mode="sum") rs = RandomSwitch(death_rate) stochastic_layers.append((rs.death_rate, scale.death_rate)) return rs([out, residual])
def g(): seq = Input(shape=(input_size, nb_chars)) z = Input(shape=(z_size,)) z_rep = RepeatVector(input_size)(z) seq_and_z = merge([seq, z_rep], mode='concat', concat_axis=-1) fake_prob = sequential([ LSTM(8), RepeatVector(output_size), LSTM(8, return_sequences=True), TimeDistributed(Dense(nb_chars, activation='softmax')), ])(seq_and_z) g = Model([z, seq], [fake_prob]) return g
def test_gan_graph(): z_shape = (1, 8, 8) z = Input(shape=z_shape, name='z') gen_cond = Input(shape=(1, 8, 8), name='gen_cond') inputs = [z, gen_cond] gen_input = merge(inputs, mode='concat', concat_axis=1) gen_output = Convolution2D(10, 2, 2, activation='relu', border_mode='same')(gen_input) generator = Container(inputs, gen_output) f, r = Input(z_shape, name='f'), Input(z_shape, name='r') inputs = [f, r] dis_input = merge(inputs, mode='concat', concat_axis=1) dis_conv = Convolution2D(5, 2, 2, activation='relu')(dis_input) dis_flatten = Flatten()(dis_conv) dis = Dense(1, activation='sigmoid')(dis_flatten) discriminator = Container(inputs, gan_outputs(dis)) gan = GAN(generator, discriminator, z_shape=z_shape, real_shape=z_shape) gan.build('adam', 'adam', gan_binary_crossentropy) gan.compile() gan.generate({'gen_cond': np.zeros((64,) + z_shape)}, nb_samples=64)
def __init__(self, g, d, m, g_optimizer, d_optimizer): self.g = g self.d = d self.m = m self.z, self.seq_input = self.g.inputs self.fake_prob, = self.g.outputs with trainable(m, False): m_input = merge([self.seq_input, self.fake_prob], mode='concat', concat_axis=1) self.m_realness = self.m(m_input) self.model_fit_g = Model([self.z, self.seq_input], [self.m_realness]) self.model_fit_g.compile(g_optimizer, K.binary_crossentropy) self.d.compile(d_optimizer, loss=K.binary_crossentropy)
def g(): seq = Input(shape=(input_size, nb_chars)) z = Input(shape=(z_size, )) z_rep = RepeatVector(input_size)(z) seq_and_z = merge([seq, z_rep], mode='concat', concat_axis=-1) fake_prob = sequential([ LSTM(8), RepeatVector(output_size), LSTM(8, return_sequences=True), TimeDistributed(Dense(nb_chars, activation='softmax')), ])(seq_and_z) g = Model([z, seq], [fake_prob]) return g
def decoder_resnet(label_sizes, nb_filter=16, data_shape=(1, 64, 64), nb_bits=12, resnet_depth=(3, 4, 6, 3), optimizer='adam'): def _bn_relu_conv(nb_filter, nb_row=3, nb_col=3, subsample=1): return sequential([ BatchNormalization(mode=0, axis=1), ELU(), Convolution2D(nb_filter=nb_filter, nb_row=nb_row, nb_col=nb_col, subsample=(subsample, subsample), init="he_normal", border_mode="same") ]) def f(nb_filter, subsample=1): return sequential([ _bn_relu_conv(nb_filter, subsample=subsample), _bn_relu_conv(nb_filter), ]) input = Input(shape=data_shape) fitlers_by_depth = [nb_filter * 2**i for i in range(len(resnet_depth))] print("fitlers_by_depth", fitlers_by_depth) x = _bn_relu_conv(nb_filter, 3, 3, subsample=2)(input) for i, (n, d) in enumerate(zip(fitlers_by_depth, resnet_depth)): for di in range(d): if di == 0 and i != 0: shortcut = _bn_relu_conv(n, 1, 1, subsample=2) subsample = 2 else: shortcut = lambda x: x subsample = 1 x = merge([shortcut(x), f(n, subsample)(x)], mode='sum') outputs, losses = decoder_end_block(x, label_sizes, nb_bits, activation=lambda: ELU()) model = Model(input, list(outputs.values())) model.compile( optimizer, loss=list(losses.values()), loss_weights={k: decoder_loss_weights(k) for k in losses.keys()}) return model
def transition_up_Layer(skip_connection, block_to_upsample, n_filters_keep): if K.image_dim_ordering() == 'th': concat_axis = 1 # sizeSC = [skip_connection._keras_shape[2], skip_connection._keras_shape[3]] # sizeX = [x._keras_shape[2], x._keras_shape[3]] elif K.image_dim_ordering() == 'tf': concat_axis = -1 # sizeSC = [skip_connection._keras_shape[1], skip_connection._keras_shape[2]] # sizeX = [x._keras_shape[1], x._keras_shape[2]] x = merge(block_to_upsample, mode='concat', concat_axis=concat_axis) print('shape_x:' + str(x._keras_shape)) x = Deconvolution2D(n_filters_keep, 3, 3, input_shape=x._keras_shape, activation='linear', border_mode='valid', subsample=(2, 2))(x) print('shape_x_deconv:' + str(x._keras_shape)) x_crop = CropLayer2D(skip_connection)(x) print('shape:' + str(x_crop._keras_shape)) x = merge([x_crop, skip_connection], mode='concat', concat_axis=concat_axis) # print('shape_skip_connection:' + str(skip_connection._keras_shape)) # newSkip = CropLayer2D(x)(skip_connection) # # # # x = merge([x, newSkip], mode = 'concat', concat_axis = concat_axis) print('shape_merge:' + str(x._keras_shape)) return x
def _build_generator_given_z_offset_and_labels(self): labels = Input(shape=self.labels_shape, name='input_labels') z_offset = Input(shape=(self.z_dim_offset, ), name='input_z_offset') outputs = OrderedDict() labels_without_bits = Subtensor(self.nb_bits, self.labels_shape[0], axis=1)(labels) # build tag3d tensors tag3d, tag3d_depth_map = self.tag3d_network(labels) tag3d_segmented = Segmentation(threshold=-0.08, smooth_threshold=0.2, sigma=1.5, name='segmentation')(tag3d) tag3d_segmented_blur = GaussianBlur(sigma=3.0)(tag3d_segmented) # get generator params blur_factor, lights, background, details = \ simple_gan_generator(self.generator_units, z_offset, labels_without_bits, tag3d_depth_map, tag3d, depth=self.generator_depth) tag3d_blur = BlendingBlur(sigma=1)([tag3d, blur_factor]) tag3d_lightin = AddLighting(scale_factor=0.85, shift_factor=0.75)([tag3d_blur] + lights) fake_without_noise = Background(name='bg')( [background, tag3d_lightin, tag3d_segmented_blur]) details_high_pass = HighPass(4, nb_steps=4)(details) fake = InBounds(-1.0, 1.0)(merge([details_high_pass, fake_without_noise], mode='sum')) outputs = [ ('tag3d', tag3d), ('tag3d_blur', tag3d_blur), ('tag3d_lightin', tag3d_lightin), ('fake_without_noise', fake_without_noise), ('fake', fake), ] outputs = OrderedDict([(name, name_tensor(x, name)) for name, x in outputs]) self.generator_given_z_and_labels = Model([z_offset, labels], [fake]) self.sample_generator_given_z_and_labels_output_names = list( outputs.keys()) self.sample_generator_given_z_and_labels = Model([z_offset, labels], list( outputs.values()))
def sequential_to_gan(generator: Sequential, discriminator: Sequential, nb_real=32, nb_fake=96): generator fake = Input(shape=discriminator.input_shape[1:], name='fake') real = Input(shape=discriminator.input_shape[1:], name='real') dis_in = merge([fake, real], concat_axis=0, mode='concat', name='concat_fake_real') dis = discriminator(dis_in) dis_outputs = gan_outputs(dis, fake_for_gen=(0, nb_fake), fake_for_dis=(nb_fake - nb_real, nb_real), real=(nb_fake, nb_fake + nb_real)) dis_container = Container([fake, real], dis_outputs) return GAN(generator, dis_container, z_shape=generator.input_shape[1:], real_shape=discriminator.input_shape[1:])
def bottleneck(inp, output, internal_scale=4, use_relu=True, asymmetric=0, dilated=0, downsample=False, dropout_rate=0.1): # main branch internal = output / internal_scale encoder = inp ## 1x1 input_stride = 2 if downsample else 1 # the first 1x1 projection is replaced with a 2x2 convolution when downsampling encoder = Convolution2D(internal, input_stride, input_stride, border_mode='same', subsample=(input_stride, input_stride), bias=False)(encoder) ## Batch normalization + PReLU encoder = BatchNormalization(momentum=0.1)(encoder) # enet uses momentum of 0.1, keras default is 0.99 encoder = PReLU(shared_axes=[1, 2])(encoder) ## conv if not asymmetric and not dilated: encoder = Convolution2D(nb_filter=internal, nb_row=3, nb_col=3, border_mode='same')(encoder) elif asymmetric: encoder = Convolution2D(nb_filter=internal, nb_row=1, nb_col=asymmetric, border_mode='same', bias=False)(encoder) encoder = Convolution2D(nb_filter=internal, nb_row=asymmetric, nb_col=1, border_mode='same')(encoder) elif dilated: encoder = AtrousConvolution2D(nb_filter=internal, nb_row=3, nb_col=3, atrous_rate=(dilated, dilated), border_mode='same')(encoder) else: raise(Exception('You shouldn\'t be here')) ## Batch normalization + PReLU encoder = BatchNormalization(momentum=0.1)(encoder) # enet uses momentum of 0.1, keras default is 0.99 encoder = PReLU(shared_axes=[1, 2])(encoder) ## 1x1 encoder = Convolution2D(nb_filter=output, nb_row=1, nb_col=1, border_mode='same', bias=False)(encoder) ## Batch normalization + Spatial dropout encoder = BatchNormalization(momentum=0.1)(encoder) # enet uses momentum of 0.1, keras default is 0.99 encoder = SpatialDropout2D(dropout_rate)(encoder) other = inp # other branch if downsample: other = MaxPooling2D()(other) other = Permute((1, 3, 2))(other) pad_featmaps = output - inp.get_shape().as_list()[3] other = ZeroPadding2D(padding=(0, 0, 0, pad_featmaps))(other) other = Permute((1, 3, 2))(other) encoder = merge([encoder, other], mode='sum') encoder = PReLU(shared_axes=[1, 2])(encoder) return encoder
def concat(tensors, axis=1, **kwargs): """ Wrapper around keras merge function. Args: tensors: list of keras tensors axis: concat on this axis kwargs: passed to the merge function Returns: The concatenated tensor """ if type(tensors) not in (list, tuple): return tensors elif len(tensors) == 1: return tensors[0] return merge(tensors, mode='concat', concat_axis=axis, **kwargs)
def MultiEmbedding_VQA_Model_FusionLast(self, params): self.ids_inputs = params["INPUTS_IDS_MODEL"] self.ids_outputs = params["OUTPUTS_IDS_MODEL"] # Question model question = Input(name=self.ids_inputs[0], shape=tuple([params['MAX_INPUT_TEXT_LEN']]), dtype='int32') text_embedding = Embedding(params['INPUT_VOCABULARY_SIZE'], params['TEXT_EMBEDDING_HIDDEN_SIZE'], input_length=params['MAX_INPUT_TEXT_LEN'], mask_zero=True, name='text_embedding')(question) lstm_text = LSTM(params['LSTM_ENCODER_HIDDEN_SIZE'], name='lstm', return_sequences=False)(text_embedding) # Image model image = Input(name=self.ids_inputs[1], shape=tuple([params['IMG_FEAT_SIZE']])) image_embedding = Dense(params['IMG_EMBEDDING_HIDDEN_SIZE'], name='image_embedding')(image) if params['USE_BATCH_NORMALIZATION']: image_embedding = BatchNormalization( name='batch_normalization_image_embedding')(image_embedding) if params['USE_PRELU']: image_embedding = PReLU()(image_embedding) # Multimodal model image_text = merge([lstm_text, image_embedding], mode=params['MULTIMODAL_MERGE_MODE']) if params['USE_DROPOUT']: image_text = Dropout(0.5)(image_text) # Classifier classifier = Dense( params['OUTPUT_VOCABULARY_SIZE'], name=self.ids_outputs[0], activation=params['CLASSIFIER_ACTIVATION'])(image_text) self.model = Model(input=[question, image], output=classifier)
def build_model(activations=False): print('Build model...') inp = Input(shape=(GLO.max_len, len(GLO.chars)), name='input') act = 'relu' gru_input = GRU(HIDDEN_SIZE, input_shape=(GLO.max_len, len(GLO.chars)), activation=act, dropout_W=DROPOUT, dropout_U=DROPOUT, return_sequences=True, name='gru_input')(inp) gru_hidden = GRU(HIDDEN_SIZE, dropout_W=DROPOUT, dropout_U=DROPOUT, name='gru_hidden', return_sequences=activations)(gru_input) if activations: # drop anything but the final layer's activations gru_hidden_all = gru_hidden gru_hidden = Lambda(lambda x: x[:, -1, :])(gru_hidden_all) output = Dense(2, activation='sigmoid', name='output')(gru_hidden) #optimizer = RMSprop(lr=0.01) #optimizer = Adadelta() #optimizer = Adam(lr=0.01) optimizer = 'nadam' if activations: output = merge([ Reshape((GLO.max_len * HIDDEN_SIZE, ))(gru_input), Reshape((GLO.max_len * HIDDEN_SIZE, ))(gru_hidden_all), output ], mode='concat') for i in range(GLO.max_len): for j in range(HIDDEN_SIZE): columns.append('gru_input_seq{}_node{}'.format(i, j)) for i in range(GLO.max_len): for j in range(HIDDEN_SIZE): columns.append('gru_hidden_seq{}_node{}'.format(i, j)) model = Model(input=inp, output=output) model.compile(loss='binary_crossentropy', optimizer=optimizer) return model
def spp(self, input, input_shape): l = input_shape[1] h = input_shape[3] w = input_shape[4] pools = [] outputs = [] for i, n_bins in enumerate(self.spp_struct): pools.append(self.__constructSppPooling(h, w, n_bins)) output = pools[i](input) layer_permute = TimeDistributed(Permute((2, 3, 1))) output = layer_permute(output) output_shape = layer_permute.output_shape layer_reshape = TimeDistributed(Reshape((output_shape[2] * output_shape[3], output_shape[4]))) outputs.append(layer_reshape(output)) print layer_reshape.output_shape output = merge(outputs, mode = 'concat', concat_axis = 2) return output
def MultiEmbedding_Glove_VQA_Model_LSTMAfterFusion(self, params): self.ids_inputs = params["INPUTS_IDS_MODEL"] self.ids_outputs = params["OUTPUTS_IDS_MODEL"] # Prepare GLOVE vectors for text embedding initialization embedding_weights = np.random.rand(params['INPUT_VOCABULARY_SIZE'], params['TEXT_EMBEDDING_HIDDEN_SIZE']) for word, index in self.vocabularies[self.ids_inputs[0]]['words2idx'].iteritems(): if self.word_vectors.get(word) is not None: embedding_weights[index, :] = self.word_vectors[word] self.word_vectors = {} # Question model question = Input(name=self.ids_inputs[0], shape=tuple([params['MAX_INPUT_TEXT_LEN']]), dtype='int32') text_embedding = Embedding(params['INPUT_VOCABULARY_SIZE'], params['TEXT_EMBEDDING_HIDDEN_SIZE'], input_length=params['MAX_INPUT_TEXT_LEN'],weights=[embedding_weights], trainable=params['GLOVE_VECTORS_TRAINABLE'], mask_zero=True, name='text_embedding')(question) lstm_text = LSTM(params['LSTM_ENCODER_HIDDEN_SIZE'], name='lstm', return_sequences=False)(text_embedding) # Image model image = Input(name=self.ids_inputs[1], shape=tuple([params['IMG_FEAT_SIZE']])) image_embedding = Dense(params['IMG_EMBEDDING_HIDDEN_SIZE'], name='image_embedding')(image) if params['USE_BATCH_NORMALIZATION']: image_embedding = BatchNormalization(name='batch_normalization_image_embedding')(image_embedding) if params['USE_PRELU']: image_embedding = PReLU()(image_embedding) # Multimodal model image_text = merge([lstm_text, image_embedding], mode=params['MULTIMODAL_MERGE_MODE']) image_text = LSTM(params['LSTM_DECODER_HIDDEN_SIZE'], return_sequences=False, name='lstm_decoder')(image_text) if params['USE_DROPOUT']: image_text = Dropout(0.5)(image_text) # Classifier classifier = Dense(params['OUTPUT_VOCABULARY_SIZE'], name=self.ids_outputs[0], activation=params['CLASSIFIER_ACTIVATION'])(image_text) self.model = Model(input=[question, image], output=classifier)
def decoder_resnet(label_sizes, nb_filter=16, data_shape=(1, 64, 64), nb_bits=12, resnet_depth=(3, 4, 6, 3), optimizer='adam'): def _bn_relu_conv(nb_filter, nb_row=3, nb_col=3, subsample=1): return sequential([ BatchNormalization(mode=0, axis=1), ELU(), Convolution2D(nb_filter=nb_filter, nb_row=nb_row, nb_col=nb_col, subsample=(subsample, subsample), init="he_normal", border_mode="same") ]) def f(nb_filter, subsample=1): return sequential([ _bn_relu_conv(nb_filter, subsample=subsample), _bn_relu_conv(nb_filter), ]) input = Input(shape=data_shape) fitlers_by_depth = [nb_filter * 2**i for i in range(len(resnet_depth))] print("fitlers_by_depth", fitlers_by_depth) x = _bn_relu_conv(nb_filter, 3, 3, subsample=2)(input) for i, (n, d) in enumerate(zip(fitlers_by_depth, resnet_depth)): for di in range(d): if di == 0 and i != 0: shortcut = _bn_relu_conv(n, 1, 1, subsample=2) subsample = 2 else: shortcut = lambda x: x subsample = 1 x = merge([shortcut(x), f(n, subsample)(x)], mode='sum') outputs, losses = decoder_end_block(x, label_sizes, nb_bits, activation=lambda: ELU()) model = Model(input, list(outputs.values())) model.compile(optimizer, loss=list(losses.values()), loss_weights={k: decoder_loss_weights(k) for k in losses.keys()}) return model
def _build_generator_given_z_offset_and_labels(self): labels = Input(shape=self.labels_shape, name='input_labels') z_offset = Input(shape=(self.z_dim_offset,), name='input_z_offset') outputs = OrderedDict() labels_without_bits = Subtensor(self.nb_bits, self.labels_shape[0], axis=1)(labels) # build tag3d tensors tag3d, tag3d_depth_map = self.tag3d_network(labels) tag3d_segmented = Segmentation(threshold=-0.08, smooth_threshold=0.2, sigma=1.5, name='segmentation')(tag3d) tag3d_segmented_blur = GaussianBlur(sigma=3.0)(tag3d_segmented) # get generator params blur_factor, lights, background, details = \ simple_gan_generator(self.generator_units, z_offset, labels_without_bits, tag3d_depth_map, tag3d, depth=self.generator_depth) tag3d_blur = BlendingBlur(sigma=1)([tag3d, blur_factor]) tag3d_lightin = AddLighting(scale_factor=0.85, shift_factor=0.75)([tag3d_blur] + lights) fake_without_noise = Background(name='bg')( [background, tag3d_lightin, tag3d_segmented_blur]) details_high_pass = HighPass(4, nb_steps=4)(details) fake = InBounds(-1.0, 1.0)(merge([details_high_pass, fake_without_noise], mode='sum')) outputs = [ ('tag3d', tag3d), ('tag3d_blur', tag3d_blur), ('tag3d_lightin', tag3d_lightin), ('fake_without_noise', fake_without_noise), ('fake', fake), ] outputs = OrderedDict([(name, name_tensor(x, name)) for name, x in outputs]) self.generator_given_z_and_labels = Model([z_offset, labels], [fake]) self.sample_generator_given_z_and_labels_output_names = list(outputs.keys()) self.sample_generator_given_z_and_labels = Model([z_offset, labels], list(outputs.values()))
def InceptionV3(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, weight_decay=0.00004, num_classes=1000, dropout_prob=0., aux_include=True): """Inception v3 architecture Note that the default image size for this model is 299x299 """ if input_shape is None: input_shape = (299, 299) if K.image_dim_ordering() == 'th': input_shape = (3, ) + input_shape channel_axis = 1 else: input_shape = input_shape + (3, ) channel_axis = 3 if input_tensor is None: img_input = Input(shape=input_shape) else: img_input = input_tensor # Using `tf` order # 299 x 299 x 3 x = conv2d_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode='valid', weight_decay=weight_decay, name='0') # 149 x 149 x 32 x = conv2d_bn(x, 32, 3, 3, border_mode='valid', weight_decay=weight_decay, name='1') # 147 x 147 x 32 x = conv2d_bn(x, 64, 3, 3, weight_decay=weight_decay, name='2') # 147 x 147 x 64 x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool_1')(x) # 73 x 73 x 64 x = conv2d_bn(x, 80, 1, 1, weight_decay=weight_decay, name='3') # 73 x 73 x 80 x = conv2d_bn(x, 192, 3, 3, border_mode='valid', weight_decay=weight_decay, name='4') # 71 x 71 x 192 x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool_2')(x) # 35 x 35 x 192 # Inception block # mixed 0: 35 x 35 x 256 branch1x1 = conv2d_bn(x, 64, 1, 1, weight_decay=weight_decay) branch5x5 = conv2d_bn(x, 48, 1, 1, weight_decay=weight_decay) branch5x5 = conv2d_bn(branch5x5, 64, 5, 5, weight_decay=weight_decay) branch3x3dbl = conv2d_bn(x, 64, 1, 1, weight_decay=weight_decay) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, weight_decay=weight_decay) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, weight_decay=weight_decay) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x) branch_pool = conv2d_bn(branch_pool, 32, 1, 1, weight_decay=weight_decay) x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool], mode='concat', concat_axis=channel_axis, name='mixed_0') for i in range(2): branch1x1 = conv2d_bn(x, 64, 1, 1, weight_decay=weight_decay) branch5x5 = conv2d_bn(x, 48, 1, 1, weight_decay=weight_decay) branch5x5 = conv2d_bn(branch5x5, 64, 5, 5, weight_decay=weight_decay) branch3x3dbl = conv2d_bn(x, 64, 1, 1, weight_decay=weight_decay) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, weight_decay=weight_decay) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, weight_decay=weight_decay) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x) branch_pool = conv2d_bn(branch_pool, 64, 1, 1, weight_decay=weight_decay) x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool], mode='concat', concat_axis=channel_axis, name='mixed_' + str(i + 1)) # mixed_3: 17 x 17 x 768 branch3x3 = conv2d_bn(x, 384, 3, 3, subsample=(2, 2), border_mode='valid', weight_decay=weight_decay) branch3x3dbl = conv2d_bn(x, 64, 1, 1, weight_decay=weight_decay) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, weight_decay=weight_decay) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, subsample=(2, 2), border_mode='valid', weight_decay=weight_decay) branch_pool = MaxPooling2D((3, 3), strides=(2, 2), border_mode='valid')(x) x = merge([branch3x3, branch3x3dbl, branch_pool], mode='concat', concat_axis=channel_axis, name='mixed_3') # mixed_4: 17 x 17 x 768 branch1x1 = conv2d_bn(x, 192, 1, 1, weight_decay=weight_decay) branch7x7 = conv2d_bn(x, 128, 1, 1, weight_decay=weight_decay) branch7x7 = conv2d_bn(branch7x7, 128, 1, 7, weight_decay=weight_decay) branch7x7 = conv2d_bn(branch7x7, 192, 7, 1, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(x, 128, 1, 1, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7, weight_decay=weight_decay) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1, weight_decay=weight_decay) x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=channel_axis, name='mixed_4') # mixed 5, 6: 17 x 17 x 768 for i in range(2): branch1x1 = conv2d_bn(x, 192, 1, 1, weight_decay=weight_decay) branch7x7 = conv2d_bn(x, 160, 1, 1, weight_decay=weight_decay) branch7x7 = conv2d_bn(branch7x7, 160, 1, 7, weight_decay=weight_decay) branch7x7 = conv2d_bn(branch7x7, 192, 7, 1, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(x, 160, 1, 1, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7, weight_decay=weight_decay) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1, weight_decay=weight_decay) x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=channel_axis, name='mixed_' + str(i + 5)) # mixed 7: 17 x 17 x 768 branch1x1 = conv2d_bn(x, 192, 1, 1, weight_decay=weight_decay) branch7x7 = conv2d_bn(x, 192, 1, 1, weight_decay=weight_decay) branch7x7 = conv2d_bn(branch7x7, 192, 1, 7, weight_decay=weight_decay) branch7x7 = conv2d_bn(branch7x7, 192, 7, 1, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(x, 192, 1, 1, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1, weight_decay=weight_decay) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7, weight_decay=weight_decay) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1, weight_decay=weight_decay) x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=channel_axis, name='mixed_7') if aux_include: # Auxiliary Head logits aux_classifier = AveragePooling2D((5, 5), strides=(3, 3), border_mode='valid')(x) aux_classifier = conv2d_bn(aux_classifier, 128, 1, 1, weight_decay=weight_decay) # Shape of feature map before the final layer # shape = aux_classifier.output_shape aux_classifier = conv2d_bn(aux_classifier, 768, 5, 5, border_mode='valid', weight_decay=weight_decay) aux_classifier = Flatten()(aux_classifier) if weight_decay and weight_decay > 0: aux_classifier = Dense(num_classes, activation='softmax', W_regularizer=l2(weight_decay), name='aux_classifier')(aux_classifier) else: aux_classifier = Dense(num_classes, activation='softmax', name='aux_classifier')(aux_classifier) # mixed 8: 8 x 8 x 1280. branch3x3 = conv2d_bn(x, 192, 1, 1, weight_decay=weight_decay) branch3x3 = conv2d_bn(branch3x3, 320, 3, 3, subsample=(2, 2), border_mode='valid', weight_decay=weight_decay) branch7x7x3 = conv2d_bn(x, 192, 1, 1, weight_decay=weight_decay) branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7, weight_decay=weight_decay) branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1, weight_decay=weight_decay) branch7x7x3 = conv2d_bn(branch7x7x3, 192, 3, 3, subsample=(2, 2), border_mode='valid', weight_decay=weight_decay) branch_pool = MaxPooling2D((3, 3), strides=(2, 2), border_mode='valid')(x) x = merge([branch3x3, branch7x7x3, branch_pool], mode='concat', concat_axis=channel_axis, name='mixed_8') # mixed 9 10: 8 x 8 x 2048 for i in range(2): branch1x1 = conv2d_bn(x, 320, 1, 1, weight_decay=weight_decay) branch3x3 = conv2d_bn(x, 384, 1, 1, weight_decay=weight_decay) branch3x3 = merge([ conv2d_bn(branch3x3, 384, 1, 3, weight_decay=weight_decay), conv2d_bn(branch3x3, 384, 3, 1, weight_decay=weight_decay) ], mode='concat', concat_axis=channel_axis) branch3x3dbl = conv2d_bn(x, 448, 1, 1, weight_decay=weight_decay) branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3, weight_decay=weight_decay) branch3x3dbl = merge([ conv2d_bn(branch3x3dbl, 384, 1, 3, weight_decay=weight_decay), conv2d_bn(branch3x3dbl, 384, 3, 1, weight_decay=weight_decay) ], mode='concat', concat_axis=channel_axis) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1, weight_decay=weight_decay) x = merge([branch1x1, branch3x3, branch3x3dbl, branch_pool], mode='concat', concat_axis=channel_axis, name='mixed_' + str(9 + i)) # Dimension reduction # 2048 x 8 x 8 x = conv2d_bn(x, 1024, 1, 1, weight_decay=weight_decay) # Final pooling and prediction # 1024 x 8 x 8 x = GlobalAveragePooling2D()(x) x = Dropout(dropout_prob)(x) # 1024 if weight_decay and weight_decay > 0: predictions = Dense(num_classes, activation='softmax', W_regularizer=l2(weight_decay), name='predictions')(x) else: predictions = Dense(num_classes, activation='softmax', name='predictions')(x) if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input if aux_include: model = Model(inputs, [predictions, aux_classifier], name='inception_v3_with_aux') else: model = Model(inputs, predictions, name='inception_v3') return model
def ArcticVideoCaptionWithInit(self, params): """ Video captioning with: * Attention mechansim on video frames * Conditional LSTM for processing the video * Feed forward layers: + Context projected to output + Last word projected to output :param params: :return: """ # Video model video = Input(name=self.ids_inputs[0], shape=tuple( [params['NUM_FRAMES'], params['IMG_FEAT_SIZE']])) input_video = video ################################################################## # ENCODER ################################################################## for activation, dimension in params['IMG_EMBEDDING_LAYERS']: input_video = TimeDistributed( Dense(dimension, name='%s_1' % activation, activation=activation, W_regularizer=l2(params['WEIGHT_DECAY'])))(input_video) input_video = Regularize(input_video, params, name='%s_1' % activation) if params['ENCODER_HIDDEN_SIZE'] > 0: if params['BIDIRECTIONAL_ENCODER']: encoder = Bidirectional(eval(params['RNN_TYPE'])( params['ENCODER_HIDDEN_SIZE'], W_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), U_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), b_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), dropout_W=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_U=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, return_sequences=True), name='bidirectional_encoder_' + params['RNN_TYPE'], merge_mode='concat')(input_video) else: encoder = eval(params['RNN_TYPE'])( params['ENCODER_HIDDEN_SIZE'], W_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), U_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), b_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), dropout_W=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_U=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, return_sequences=True, name='encoder_' + params['RNN_TYPE'])(input_video) input_video = merge([input_video, encoder], mode='concat', concat_axis=2) input_video = Regularize(input_video, params, name='input_video') # 2.3. Potentially deep encoder for n_layer in range(1, params['N_LAYERS_ENCODER']): if params['BIDIRECTIONAL_DEEP_ENCODER']: current_input_video = Bidirectional( eval(params['RNN_TYPE'])( params['ENCODER_HIDDEN_SIZE'], W_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), U_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), b_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), dropout_W=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_U=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, return_sequences=True, ), merge_mode='concat', name='bidirectional_encoder_' + str(n_layer))(input_video) else: current_input_video = eval(params['RNN_TYPE'])( params['ENCODER_HIDDEN_SIZE'], W_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), U_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), b_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), dropout_W=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_U=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, return_sequences=True, name='encoder_' + str(n_layer))(input_video) current_input_video = Regularize(current_input_video, params, name='input_video_' + str(n_layer)) input_video = merge([input_video, current_input_video], mode='sum') # Previously generated words as inputs for training next_words = Input(name=self.ids_inputs[1], batch_shape=tuple([None, None]), dtype='int32') emb = Embedding(params['OUTPUT_VOCABULARY_SIZE'], params['TARGET_TEXT_EMBEDDING_SIZE'], name='target_word_embedding', W_regularizer=l2(params['WEIGHT_DECAY']), trainable=self.trg_embedding_weights_trainable, weights=self.trg_embedding_weights, mask_zero=True)(next_words) emb = Regularize(emb, params, name='target_word_embedding') # LSTM initialization perceptrons with ctx mean # 3.2. Decoder's RNN initialization perceptrons with ctx mean ctx_mean = Lambda(lambda x: K.mean(x, axis=1), output_shape=lambda s: (s[0], s[2]), name='lambda_mean')(input_video) if len(params['INIT_LAYERS']) > 0: for n_layer_init in range(len(params['INIT_LAYERS']) - 1): ctx_mean = Dense( params['DECODER_HIDDEN_SIZE'], name='init_layer_%d' % n_layer_init, W_regularizer=l2(params['WEIGHT_DECAY']), activation=params['INIT_LAYERS'][n_layer_init])(ctx_mean) ctx_mean = Regularize(ctx_mean, params, name='ctx' + str(n_layer_init)) initial_state = Dense( params['DECODER_HIDDEN_SIZE'], name='initial_state', W_regularizer=l2(params['WEIGHT_DECAY']), activation=params['INIT_LAYERS'][-1])(ctx_mean) initial_state = Regularize(initial_state, params, name='initial_state') input_attentional_decoder = [emb, input_video, initial_state] if params['RNN_TYPE'] == 'LSTM': initial_memory = Dense( params['DECODER_HIDDEN_SIZE'], name='initial_memory', W_regularizer=l2(params['WEIGHT_DECAY']), activation=params['INIT_LAYERS'][-1])(ctx_mean) initial_memory = Regularize(initial_memory, params, name='initial_memory') input_attentional_decoder.append(initial_memory) else: input_attentional_decoder = [emb, input_video] ################################################################## # DECODER ################################################################## # 3.3. Attentional decoder sharedAttRNNCond = eval('Att' + params['RNN_TYPE'] + 'Cond')( params['DECODER_HIDDEN_SIZE'], W_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), U_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), V_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), b_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), wa_regularizer=l2(params['WEIGHT_DECAY']), Wa_regularizer=l2(params['WEIGHT_DECAY']), Ua_regularizer=l2(params['WEIGHT_DECAY']), ba_regularizer=l2(params['WEIGHT_DECAY']), dropout_W=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_U=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_V=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_wa=params['DROPOUT_P'] if params['USE_DROPOUT'] else None, dropout_Wa=params['DROPOUT_P'] if params['USE_DROPOUT'] else None, dropout_Ua=params['DROPOUT_P'] if params['USE_DROPOUT'] else None, return_sequences=True, return_extra_variables=True, return_states=True, name='decoder_Att' + params['RNN_TYPE'] + 'Cond') rnn_output = sharedAttRNNCond(input_attentional_decoder) proj_h = rnn_output[0] x_att = rnn_output[1] alphas = rnn_output[2] h_state = rnn_output[3] if params['RNN_TYPE'] == 'LSTM': h_memory = rnn_output[4] [proj_h, shared_reg_proj_h] = Regularize(proj_h, params, shared_layers=True, name='proj_h0') shared_FC_mlp = TimeDistributed(Dense( params['TARGET_TEXT_EMBEDDING_SIZE'], W_regularizer=l2(params['WEIGHT_DECAY']), activation='linear', ), name='logit_lstm') out_layer_mlp = shared_FC_mlp(proj_h) shared_FC_ctx = TimeDistributed(Dense( params['TARGET_TEXT_EMBEDDING_SIZE'], W_regularizer=l2(params['WEIGHT_DECAY']), activation='linear', ), name='logit_ctx') out_layer_ctx = shared_FC_ctx(x_att) shared_Lambda_Permute = PermuteGeneral((1, 0, 2)) out_layer_ctx = shared_Lambda_Permute(out_layer_ctx) shared_FC_emb = TimeDistributed(Dense( params['TARGET_TEXT_EMBEDDING_SIZE'], W_regularizer=l2(params['WEIGHT_DECAY']), activation='linear'), name='logit_emb') out_layer_emb = shared_FC_emb(emb) [out_layer_mlp, shared_reg_out_layer_mlp] = Regularize(out_layer_mlp, params, shared_layers=True, name='out_layer_mlp') [out_layer_ctx, shared_reg_out_layer_ctx] = Regularize(out_layer_ctx, params, shared_layers=True, name='out_layer_ctx') [out_layer_emb, shared_reg_out_layer_emb] = Regularize(out_layer_emb, params, shared_layers=True, name='out_layer_emb') additional_output = merge( [out_layer_mlp, out_layer_ctx, out_layer_emb], mode='sum', name='additional_input') shared_activation_tanh = Activation('tanh') out_layer = shared_activation_tanh(additional_output) shared_deep_list = [] shared_reg_deep_list = [] # 3.6 Optional deep ouput layer for i, (activation, dimension) in enumerate(params['DEEP_OUTPUT_LAYERS']): if activation.lower() == 'maxout': shared_deep_list.append( TimeDistributed(MaxoutDense(dimension, W_regularizer=l2( params['WEIGHT_DECAY'])), name='maxout_%d' % i)) else: shared_deep_list.append( TimeDistributed(Dense(dimension, activation=activation, W_regularizer=l2( params['WEIGHT_DECAY'])), name=activation + '_%d' % i)) out_layer = shared_deep_list[-1](out_layer) [out_layer, shared_reg_out_layer ] = Regularize(out_layer, params, shared_layers=True, name='out_layer' + str(activation)) shared_reg_deep_list.append(shared_reg_out_layer) # 3.7. Output layer: Softmax shared_FC_soft = TimeDistributed(Dense( params['OUTPUT_VOCABULARY_SIZE'], activation=params['CLASSIFIER_ACTIVATION'], W_regularizer=l2(params['WEIGHT_DECAY']), name=params['CLASSIFIER_ACTIVATION']), name=self.ids_outputs[0]) softout = shared_FC_soft(out_layer) self.model = Model(input=[video, next_words], output=softout) ################################################################## # BEAM SEARCH MODEL # ################################################################## # Now that we have the basic training model ready, let's prepare the model for applying decoding # The beam-search model will include all the minimum required set of layers (decoder stage) which offer the # possibility to generate the next state in the sequence given a pre-processed input (encoder stage) if params['BEAM_SEARCH']: # First, we need a model that outputs the preprocessed input + initial h state # for applying the initial forward pass model_init_input = [video, next_words] model_init_output = [softout, input_video, h_state] if params['RNN_TYPE'] == 'LSTM': model_init_output.append(h_memory) self.model_init = Model(input=model_init_input, output=model_init_output) # Store inputs and outputs names for model_init self.ids_inputs_init = self.ids_inputs # first output must be the output probs. self.ids_outputs_init = self.ids_outputs + [ 'preprocessed_input', 'next_state' ] if params['RNN_TYPE'] == 'LSTM': self.ids_outputs_init.append('next_memory') # Second, we need to build an additional model with the capability to have the following inputs: # - preprocessed_input # - prev_word # - prev_state # and the following outputs: # - softmax probabilities # - next_state if params['ENCODER_HIDDEN_SIZE'] > 0: if params['BIDIRECTIONAL_ENCODER']: preprocessed_size = params[ 'ENCODER_HIDDEN_SIZE'] * 2 + params['IMG_FEAT_SIZE'] else: preprocessed_size = params['ENCODER_HIDDEN_SIZE'] + params[ 'IMG_FEAT_SIZE'] else: preprocessed_size = params['IMG_FEAT_SIZE'] # Define inputs preprocessed_annotations = Input( name='preprocessed_input', shape=tuple([params['NUM_FRAMES'], preprocessed_size])) prev_h_state = Input(name='prev_state', shape=tuple([params['DECODER_HIDDEN_SIZE']])) input_attentional_decoder = [ emb, preprocessed_annotations, prev_h_state ] if params['RNN_TYPE'] == 'LSTM': prev_h_memory = Input(name='prev_memory', shape=tuple( [params['DECODER_HIDDEN_SIZE']])) input_attentional_decoder.append(prev_h_memory) # Apply decoder rnn_output = sharedAttRNNCond(input_attentional_decoder) proj_h = rnn_output[0] x_att = rnn_output[1] alphas = rnn_output[2] h_state = rnn_output[3] if params['RNN_TYPE'] == 'LSTM': h_memory = rnn_output[4] for reg in shared_reg_proj_h: proj_h = reg(proj_h) out_layer_mlp = shared_FC_mlp(proj_h) out_layer_ctx = shared_FC_ctx(x_att) out_layer_ctx = shared_Lambda_Permute(out_layer_ctx) out_layer_emb = shared_FC_emb(emb) for (reg_out_layer_mlp, reg_out_layer_ctx, reg_out_layer_emb) in zip(shared_reg_out_layer_mlp, shared_reg_out_layer_ctx, shared_reg_out_layer_emb): out_layer_mlp = reg_out_layer_mlp(out_layer_mlp) out_layer_ctx = reg_out_layer_ctx(out_layer_ctx) out_layer_emb = reg_out_layer_emb(out_layer_emb) additional_output = merge( [out_layer_mlp, out_layer_ctx, out_layer_emb], mode='sum', name='additional_input_model_next') out_layer = shared_activation_tanh(additional_output) for (deep_out_layer, reg_list) in zip(shared_deep_list, shared_reg_deep_list): out_layer = deep_out_layer(out_layer) for reg in reg_list: out_layer = reg(out_layer) # Softmax softout = shared_FC_soft(out_layer) model_next_inputs = [ next_words, preprocessed_annotations, prev_h_state ] model_next_outputs = [softout, preprocessed_annotations, h_state] if params['RNN_TYPE'] == 'LSTM': model_next_inputs.append(prev_h_memory) model_next_outputs.append(h_memory) self.model_next = Model(input=model_next_inputs, output=model_next_outputs) # Store inputs and outputs names for model_next # first input must be previous word self.ids_inputs_next = [self.ids_inputs[1] ] + ['preprocessed_input', 'prev_state'] # first output must be the output probs. self.ids_outputs_next = self.ids_outputs + [ 'preprocessed_input', 'next_state' ] # Input -> Output matchings from model_init to model_next and from model_next to model_next self.matchings_init_to_next = { 'preprocessed_input': 'preprocessed_input', 'next_state': 'prev_state' } self.matchings_next_to_next = { 'preprocessed_input': 'preprocessed_input', 'next_state': 'prev_state' } if params['RNN_TYPE'] == 'LSTM': self.ids_inputs_next.append('prev_memory') self.ids_outputs_next.append('next_memory') self.matchings_init_to_next['next_memory'] = 'prev_memory' self.matchings_next_to_next['next_memory'] = 'prev_memory'
def generator(inputs): z, = inputs z_driver = Split(*z_for_driver, axis=1)(z) z_offset = Split(*z_for_offset, axis=1)(z) z_bits = Split(*z_for_bits, axis=1)(z) bits = get_bits(z_bits) driver = mask_driver(z_driver) driver_norm = NormSinCosAngle(0)(driver) mask_input = concat([bits, driver_norm], name='mask_gen_in') mask = mask_generator(mask_input) if mask_generator_weights: mask_layers = collect_layers(mask_input, mask) load_weights(mask_layers, mask_generator_weights) selection = with_regularizer(Selection(threshold=-0.08, smooth_threshold=0.2, sigma=1.5, name='selection'), MinCoveredRegularizer()) mask_down = PyramidReduce()(mask) mask_selection = selection(mask) mask_selection_down = PyramidReduce(scale=4)(mask_selection) out_offset_front = offset_front([z_offset, ZeroGradient()(driver_norm)]) light_scale64, light_shift64 = \ lighting_generator([out_offset_front, light_merge_mask16(mask_selection_down)]) mask_with_lighting = AddLighting( scale_factor=0.5, shift_factor=0.75)( [mask, light_scale64, light_shift64]) out_offset_middle = offset_middle( [out_offset_front, offset_merge_mask16(mask_selection_down), offset_merge_light16(concat(light_scale64, light_shift64)) ]) offset_back_feature_map, out_offset_back = offset_back( [out_offset_middle, offset_merge_mask32(mask_down)]) mask_weight32 = mask_weight_blending32(out_offset_middle) mask_weight64 = mask_weight_blending64(out_offset_middle) blending = PyramidBlending(offset_pyramid_layers=3, mask_pyramid_layers=3, mask_weights=['variable', 'variable', 1], offset_weights=[1, 1, 1], use_selection=[True, True, True], name='blending')( [out_offset_back, mask_with_lighting, mask_selection, mask_weight32, mask_weight64]) mask_post = mask_postprocess([ blending, mask_selection, light_scale64, light_shift64, mask, out_offset_back, offset_back_feature_map ]) mask_post_high = HighFrequencies(4, nb_steps=5, name='mask_post_high')(mask_post) blending_post = merge([mask_post_high, blending], mode='sum', name='blending_post') return LinearInBounds(-1.2, 1.2)(blending_post)
def create_model(args, initial_mean_value, overal_maxlen, vocab): ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # if args.dropout_w > 0: dropout_W = args.dropout_w else: dropout_W = args.dropout_prob # default=0.5 if args.dropout_u > 0: dropout_U = args.dropout_u else: dropout_U = args.dropout_prob # default=0.1 cnn_border_mode = 'same' if args.model_type == 'reg': if initial_mean_value.ndim == 0: initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) else: num_outputs = initial_mean_value ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: def my_init(shape, name=None): from nea.w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) emb_matrix = np.random.random(shape) # logger.info(' initial matrix \n %s ' % (emb_matrix,)) emb_matrix = emb_reader.get_emb_matrix_given_vocab( vocab, emb_matrix) # from keras.backend import set_value, get_value # set_value(model.layers[model.emb_index].W, get_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W))) # model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value())) # logger.info(' pre-trained matrix \n %s ' % (emb_matrix,)) return K.variable(emb_matrix, name=name) logger.info(' Use pre-trained embedding') else: my_init = 'uniform' logger.info(' Use default initializing embedding') ############################################################################################################################### ## Model Stacking # if args.model_type == 'cls': logger.info('Building a CLASSIFICATION model with POOLING') dense_activation = 'tanh' dense_init = 'glorot_normal' final_init = 'glorot_uniform' if args.loss == 'cnp': final_activation = 'softmax' elif args.loss == 'hng': final_activation = 'linear' elif args.model_type == 'reg': logger.info('Building a REGRESSION model with POOLING') if args.normalize: final_activation = 'sigmoid' final_init = 'he_normal' dense_activation = 'tanh' dense_init = 'he_normal' else: final_activation = 'relu' final_init = 'he_uniform' dense_activation = 'tanh' dense_init = 'he_uniform' else: raise NotImplementedError sequence = Input(shape=(overal_maxlen, ), dtype='int32') x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # Conv Layer if args.cnn_dim > 0: x = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(x) # RNN Layer if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(x) if args.bi: backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(x) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) if args.bi: backwards = Dropout(args.dropout_prob)(backwards) # Stack 2 Layers if args.rnn_2l or args.rnn_3l: if args.bi: merged = merge([forwards, backwards], mode='concat', concat_axis=-1) else: merged = forwards forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(merged) if args.bi: backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(merged) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) if args.bi: backwards = Dropout(args.dropout_prob)(backwards) # Stack 3 Layers if args.rnn_3l: if args.bi: merged = merge([forwards, backwards], mode='concat', concat_axis=-1) else: merged = forwards forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(merged) if args.bi: backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(merged) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) if args.bi: backwards = Dropout(args.dropout_prob)(backwards) if args.aggregation == 'mot': forwards = MeanOverTime(mask_zero=True)(forwards) if args.bi: backwards = MeanOverTime(mask_zero=True)(backwards) merged = merge([forwards, backwards], mode='concat', concat_axis=-1) else: merged = forwards else: raise NotImplementedError # Augmented TF/IDF Layer if args.tfidf > 0: pca_input = Input(shape=(args.tfidf, ), dtype='float32') tfidfmerged = merge([merged, pca_input], mode='concat') else: tfidfmerged = merged # Optional Dense Layer if args.dense > 0: if args.loss == 'hng': tfidfmerged = Dense( num_outputs, init=dense_init, W_regularizer=l2(0.001), activity_regularizer=activity_l2(0.001))(tfidfmerged) else: tfidfmerged = Dense(num_outputs, init=dense_init)(tfidfmerged) if final_activation == 'relu' or final_activation == 'linear': tfidfmerged = BatchNormalization()(tfidfmerged) tfidfmerged = Activation(dense_activation)(tfidfmerged) if args.dropout_prob > 0: tfidfmerged = Dropout(args.dropout_prob)(tfidfmerged) # Final Prediction Layer if args.loss == 'hng': tfidfmerged = Dense( num_outputs, init=final_init, W_regularizer=l2(0.001), activity_regularizer=activity_l2(0.001))(tfidfmerged) else: tfidfmerged = Dense(num_outputs, init=final_init)(tfidfmerged) if final_activation == 'relu' or final_activation == 'linear': tfidfmerged = BatchNormalization()(tfidfmerged) predictions = Activation(final_activation)(tfidfmerged) else: # if no rnn if args.dropout_prob > 0: x = Dropout(args.dropout_prob)(x) # Mean over Time if args.aggregation == 'mot': x = MeanOverTime(mask_zero=True)(x) else: raise NotImplementedError # Augmented TF/IDF Layer if args.tfidf > 0: pca_input = Input(shape=(args.tfidf, ), dtype='float32') z = merge([x, pca_input], mode='concat') else: z = x # Optional Dense Layer if args.dense > 0: if args.loss == 'hng': z = Dense(args.dense, init=dense_init, W_regularizer=l2(0.001), activity_regularizer=activity_l2(0.001))(z) else: z = Dense(args.dense, init=dense_init)(z) if final_activation == 'relu' or final_activation == 'linear': z = BatchNormalization()(z) z = Activation(dense_activation)(z) if args.dropout_prob > 0: z = Dropout(args.dropout_prob)(z) # Final Prediction Layer if args.loss == 'hng': z = Dense(num_outputs, init=final_init, W_regularizer=l2(0.001), activity_regularizer=activity_l2(0.001))(z) else: z = Dense(args.dense, init=dense_init)(z) if final_activation == 'relu' or final_activation == 'linear': z = BatchNormalization()(z) predictions = Activation(final_activation)(z) # Model Input/Output if args.tfidf > 0: model = Model(input=[sequence, pca_input], output=predictions) else: model = Model(input=sequence, output=predictions) # if args.model_type == 'cls': # logger.info('Building a CLASSIFICATION model') # sequence = Input(shape=(overal_maxlen,), dtype='int32') # x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # if args.cnn_dim > 0: # x = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(x) # if args.rnn_dim > 0: # x = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(x) # predictions = Dense(num_outputs, activation='softmax')(x) # model = Model(input=sequence, output=predictions) # elif args.model_type == 'clsp': # elif args.model_type == 'mlp': # logger.info('Building a linear model with POOLING') # sequence = Input(shape=(overal_maxlen,), dtype='int32') # x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # if args.dropout_prob > 0: # x = Dropout(args.dropout_prob)(x) # x = MeanOverTime(mask_zero=True)(x) # if args.tfidf > 0: # z = merge([x,pca_input], mode='concat') # else: # z = x # if args.dense > 0: # z = Dense(args.dense, activation='tanh')(z) # if args.dropout_prob > 0: # z = Dropout(args.dropout_prob)(z) # predictions = Dense(num_outputs, activation='softmax')(z) # if args.tfidf > 0: # model = Model(input=[sequence, pca_input], output=predictions) # else: # model = Model(input=sequence, output=predictions) # # elif args.model_type == 'reg': # logger.info('Building a REGRESSION model') # model = Sequential() # model.add(Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)) # if args.cnn_dim > 0: # model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) # if args.rnn_dim > 0: # model.add(RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)) # if args.dropout_prob > 0: # model.add(Dropout(args.dropout_prob)) # model.add(Dense(num_outputs)) # if not args.skip_init_bias: # bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) # model.layers[-1].b.set_value(bias_value) # model.add(Activation('sigmoid')) # # elif args.model_type == 'regp': # logger.info('Building a REGRESSION model with POOLING') # model = Sequential() # model.add(Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)) # if args.cnn_dim > 0: # model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) # if args.rnn_dim > 0: # model.add(RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)) # if args.dropout_prob > 0: # model.add(Dropout(args.dropout_prob)) # if args.aggregation == 'mot': # model.add(MeanOverTime(mask_zero=True)) # elif args.aggregation.startswith('att'): # model.add(Attention(op=args.aggregation, activation='tanh', init_stdev=0.01)) # model.add(Dense(num_outputs)) # if not args.skip_init_bias: # bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) # # model.layers[-1].b.set_value(bias_value) # K.set_value(model.layers[-1].b, bias_value) # model.add(Activation('sigmoid')) # # elif args.model_type == 'breg': # logger.info('Building a BIDIRECTIONAL REGRESSION model') # sequence = Input(shape=(overal_maxlen,), dtype='int32') # output = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # if args.cnn_dim > 0: # output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) # if args.rnn_dim > 0: # forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output) # backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) # if args.dropout_prob > 0: # forwards = Dropout(args.dropout_prob)(forwards) # backwards = Dropout(args.dropout_prob)(backwards) # merged = merge([forwards, backwards], mode='concat', concat_axis=-1) # densed = Dense(num_outputs)(merged) # if not args.skip_init_bias: # raise NotImplementedError # score = Activation('sigmoid')(densed) # model = Model(input=sequence, output=score) # # elif args.model_type == 'bregp': # logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING') # sequence = Input(shape=(overal_maxlen,), dtype='int32') # output = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # if args.cnn_dim > 0: # output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) # if args.rnn_dim > 0: # forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output) # backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) # if args.dropout_prob > 0: # forwards = Dropout(args.dropout_prob)(forwards) # backwards = Dropout(args.dropout_prob)(backwards) # forwards_mean = MeanOverTime(mask_zero=True)(forwards) # backwards_mean = MeanOverTime(mask_zero=True)(backwards) # merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1) # densed = Dense(num_outputs)(merged) # if not args.skip_init_bias: # raise NotImplementedError # score = Activation('sigmoid')(densed) # model = Model(input=sequence, output=score) logger.info(' Model Done') return model
def attention_imp_merge_exp(): """ https://richliao.github.io/supervised/classification/2016/12/26/textclassifier-HATN/ :return: """ global X_DIM, Y_DIM # Load Embeddings matrix embedding_weights = joblib.load(config.DUMPED_VECTOR_DIR + 'mb_voc_embeddings.pkl') # model cnn model_atn = Sequential() model_atn.add(Embedding(max_features, embedding_dims, input_length=max_len, weights=[embedding_weights], trainable=True)) model_atn.add(Bidirectional(GRU(200, return_sequences=True), name='bidirectional')) model_atn.add(TimeDistributed(Dense(200), name='time_dist')) model_atn.add(AttLayer(name='att')) model_feature_vec = Sequential() model_feature_vec.add(Dense(200, input_dim=N_Features, init='normal', activation='relu')) model_feature_vec.add(Dense(100, init='normal', activation='relu')) model_feature_vec.add(Dropout(0.2)) model_feature_vec.add(Dense(50, init='normal', activation='relu')) model_feature_vec.add(Dense(10, init='normal', activation='relu')) #functional API sentence = Input(shape=(max_len,), dtype='float32', name='w1') embedding_layer = Embedding(input_dim=max_features, output_dim=embedding_dims, weights=[embedding_weights], ) sentence = Input(shape=(max_len,), dtype='float32', name='w1') embedding_layer = Embedding(input_dim=max_features, output_dim=embedding_dims, weights=[embedding_weights], ) sentence_emb = embedding_layer(sentence) # dropout_1 = Dropout(0.2, name='emb_dropout') # sentence_drop = dropout_1(sentence_emb) cnn_layers = [Convolution1D(filter_length=filter_length, nb_filter=512, activation='relu', border_mode='same') for filter_length in [1, 2, 3, 5]] merged_cnn = merge([cnn(sentence_emb) for cnn in cnn_layers], mode='concat', concat_axis=-1) # pooling_layer = MaxPooling1D(2, name='maxpool')(merged_cnn) attention = AttLayer(name='att')(merged_cnn) # flatten_layer = Flatten()(attention) cnn_model = Dense(128, init='normal', activation='relu')(attention) model_cnn = Model(input=[sentence], output=[cnn_model], name='cnn_model') # model_cnn = Sequential() # model_cnn.add(Embedding(max_features, # embedding_dims, # input_length=max_len, # weights=[embedding_weights], # trainable=True)) # model_cnn.add(Conv1D(512, 3, activation='relu', name='cnn1')) # model_cnn.add(Conv1D(512, 4, activation='relu', name='cnn2')) # model_cnn.add(Conv1D(512, 5, activation='relu', name='cnn3')) # model_cnn.add(MaxPooling1D(2, name='maxpool')) # model_cnn.add(Flatten()) # model_cnn.add(Dense(128, activation='relu')) merged_layer = Sequential() merged_layer.add(Merge([model_atn, model_feature_vec, model_cnn], mode='concat', concat_axis=1, name='merge_layer')) merged_layer.add(Reshape((1, 338))) merged_layer.add(Bidirectional(GRU(200, return_sequences=True), name='bidirectional_2')) merged_layer.add(TimeDistributed(Dense(50), name='time_dist')) merged_layer.add(AttLayer(name='att')) merged_layer.add(Dense(1, init='normal', name='combined_dense', activation='tanh')) # # Compile model merged_layer.compile(loss='mae', optimizer='adam') print(merged_layer.summary()) return merged_layer
def _build_generator_given_z_offset_and_labels(self): labels = Input(shape=self.labels_shape, name='input_labels') z_offset = Input(shape=(self.z_dim_offset,), name='input_z_offset') outputs = OrderedDict() labels_without_bits = Subtensor(self.nb_bits, self.labels_shape[0], axis=1)(labels) raw_tag3d, tag3d_depth_map = self.tag3d_network(labels) tag3d = ScaleUnitIntervalTo(-1, 1)(raw_tag3d) outputs['tag3d'] = tag3d outputs['tag3d_depth_map'] = tag3d_depth_map segmentation = Segmentation(threshold=-0.08, smooth_threshold=0.2, sigma=1.5, name='segmentation') tag3d_downsampled = PyramidReduce()(tag3d) tag3d_segmented = segmentation(raw_tag3d) outputs['tag3d_segmented'] = tag3d_segmented tag3d_segmented_blur = GaussianBlur(sigma=0.66)(tag3d_segmented) out_offset_front = get_offset_front([z_offset, ZeroGradient()(labels_without_bits)], self.generator_units) light_depth_map = get_preprocess(tag3d_depth_map, self.preprocess_units, nb_conv_layers=2) light_outs = get_lighting_generator([out_offset_front, light_depth_map], self.generator_units) offset_depth_map = get_preprocess(tag3d_depth_map, self.preprocess_units, nb_conv_layers=2) offset_middle_light = get_preprocess(concat(light_outs), self.preprocess_units, resize=['down', 'down']) offset_middle_tag3d = get_preprocess(tag3d_downsampled, self.preprocess_units // 2, resize=['down', ''], nb_conv_layers=2) out_offset_middle = get_offset_middle( [out_offset_front, offset_depth_map, offset_middle_light, offset_middle_tag3d], self.generator_units) offset_back_tag3d_downsampled = get_preprocess(tag3d_downsampled, self.preprocess_units // 2, nb_conv_layers=2) offset_back_feature_map, out_offset_back = get_offset_back( [out_offset_middle, offset_back_tag3d_downsampled], self.generator_units) blur_factor = get_blur_factor(out_offset_middle, min=0.25, max=1.) outputs['blur_factor'] = blur_factor tag3d_blur = BlendingBlur(sigma=2.0)([tag3d, blur_factor]) outputs['tag3d_blur'] = tag3d_blur outputs['light_black'] = light_outs[0] outputs['light_white'] = light_outs[1] outputs['light_shift'] = light_outs[2] tag3d_lighten = AddLighting( scale_factor=0.90, shift_factor=0.90)([tag3d_blur] + light_outs) tag3d_lighten = InBounds(clip=True, weight=15)(tag3d_lighten) outputs['tag3d_lighten'] = tag3d_lighten outputs['background_offset'] = out_offset_back blending = Background(name='blending')([out_offset_back, tag3d_lighten, tag3d_segmented_blur]) outputs['fake_without_noise'] = blending details = get_details( [blending, tag3d_segmented_blur, tag3d, out_offset_back, offset_back_feature_map] + light_outs, self.generator_units) outputs['details_offset'] = details details_high_pass = HighPass(3.5, nb_steps=3)(details) outputs['details_high_pass'] = details_high_pass fake = InBounds(-2.0, 2.0)( merge([details_high_pass, blending], mode='sum')) outputs['fake'] = fake for name in outputs.keys(): outputs[name] = name_tensor(outputs[name], name) self.generator_given_z_and_labels = Model([z_offset, labels], [fake]) self.sample_generator_given_z_and_labels_output_names = list(outputs.keys()) self.sample_generator_given_z_and_labels = Model([z_offset, labels], list(outputs.values()))
def build_siamese_model(): print( 'building pairs of reviews for siamese model input...' ) ((trainingSets), (devSets), (devKNNsets), (testSets)) = build_siamese_input( VocabSize, useWords = USEWORDS, skipTop = skipTop, devSplit = DEVSPLIT ) # X_left and X_right are matrices with trainingSet rows and reviewLen columns # y_left and y_right are the corresponding sentiment labels i.e 0:negative 1:positive # similarity is 0 if X_left and X_right have same sentiment labels and 1 otherwise X_left, y_left, X_right, y_right, similarity = trainingSets # Xtest_left, ytest_left, Xtest_right, ytest_right, test_similarity = testSets # Xdev_left and Xdev_right are matrices with devSet rows and reviewLen columns Xdev_left, ydev_left, Xdev_right, ydev_right, dev_similarity = devSets print( len( X_left ), 'train sequences length' ) print( len( Xdev_left ), 'dev sequences length' ) print( len( devKNNsets[ 0 ] ), 'devKNN sequences length' ) print( len( testSets[ 0 ] ), 'test sequences length' ) print( 'train shape:', X_left.shape ) print( 'dev shape:', Xdev_left.shape ) print( 'devKNN shape:', devKNNsets[ 0 ].shape ) print( 'test shape:', testSets[ 0 ].shape ) print( 'Build model...' ) # review_input = Input( shape = (maxReviewLen,), dtype = 'int32', name = "review" ) # # # probability of positive sentiment for left input and right input; # # during training these are either 1 or 0 because we have that info in y_left and y_right # # but during testing its 0.5 indicating equal probability of positive or negative # #TODO currently not using but still thinking about how to use this information # #sentiment_prob_input = Input( shape = (1,), dtype = 'float32', name = "sentprob" ) # # sharedEmbedding = Embedding( VocabSize, embedding_dims, # input_length = maxReviewLen ) # # layer = sharedEmbedding( review_input ) # # sharedConv1 = Convolution1D( nb_filter = num_filters1, # filter_length = filter_length1, # border_mode = 'valid', # activation = 'relu', # subsample_length = stride_len1, # init = 'uniform' ) # # layer = sharedConv1( layer ) # # layer = Dropout( 0.25 )( layer ) # # layer = MaxPooling1D( pool_length = 2 )( layer ) # # sharedConv2 = Convolution1D( nb_filter = num_filters2, # filter_length = filter_length2, # border_mode = 'valid', # activation = 'relu', # subsample_length = stride_len2, # init = 'uniform' # ) # # layer = sharedConv2( layer ) # # layer = Dropout( 0.30 )( layer ) # # layer = MaxPooling1D( pool_length = 2 )( layer ) # # sharedConv3 = Convolution1D( nb_filter = num_filters3, # filter_length = filter_length3, # border_mode = 'valid', # activation = 'relu', # subsample_length = stride_len3, # init = 'uniform' # ) # # layer = sharedConv3( layer ) # # layer = Dropout( 0.35 )( layer ) # # layer = MaxPooling1D( pool_length = 2 )( layer ) # # sharedConv4 = Convolution1D( nb_filter = num_filters4, # filter_length = filter_length4, # border_mode = 'valid', # activation = 'relu', # subsample_length = stride_len4, # init = 'uniform', # # ) # # layer = sharedConv4( layer ) # # layer = Dropout( 0.35 )( layer ) # # layer = MaxPooling1D( pool_length = 2 )( layer ) # # layer = Flatten( )( layer ) # # # Dense layers default to 'glorot_normal' for init weights but that may not be optimal # # for NLP tasks # sharedDense1 = Dense( dense_dims1, init = 'uniform', activation = 'relu', # W_regularizer = l2( l = 0.0001 ) ) # # layer = sharedDense1( layer ) # # # layer = Dropout( 0.35 )( layer ) # # # # sharedDense2 = Dense( dense_dims2, init = 'uniform', activation = 'relu', # W_regularizer = l2( l = 0.0001 ) ) # # out = sharedDense2( layer ) # # # # layer = Dropout( 0.35 )( layer ) # # # # sharedDense3 = Dense( dense_dims3, activation = 'relu' ) # # # # out = sharedDense3( layer ) # # # TODO removed sentiment label info for now # #sentiment label is concatenated onto output vector of the prior fully connected layer # #out = merge( [ layer, sentiment_prob_input ], mode = 'concat',concat_axis = 1, name = "cnn_output" ) # # # #TODO with sentiment label info added--model inputs are [review_input,sentiment_prob_input] # # CNN_model = Model( input = [ review_input ], output = out, name = "CNN_model" ) CNN_model = build_CNN_model('1hotVector') Lreview = Input( shape = (maxReviewLen,), dtype = 'int32', name = "Lreview" ) Rreview = Input( shape = (maxReviewLen,), dtype = 'int32', name = "Rreview" ) #TODO removed sentiment label info for now #Lsentiment_prob = Input( shape = (1,), dtype = 'float32', name = "Lsentprob" ) #TODO removed sentiment label info for now #Rsentiment_prob = Input( shape = (1,), dtype = 'float32', name = "Rsentprob" ) #TODO with sentiment label info added--CNN_model is CNN_model([review,sentiment_prob]) rightbranch = CNN_model # ( [ Rreview ] ) leftbranch = CNN_model # ( [ Lreview ] ) #first take the difference of the final feature representations from the CNN_model #represented by leftbranch and rightbranch merged_vector = merge( [ leftbranch, rightbranch ], mode = vectorDifference, output_shape = merged_outshape, name = 'merged_vector' ) # then that difference vector is fed into the final fully connected layer that # outputs the energy i.e. squared euclidian distance ||leftbranch-rightbranch|| siamese_out = Dense( 1, activation = squaredl2, name = 'energy_output' )( merged_vector ) #TODO if sentiment label info included then inputs=[Lreview,Lsent_prob,Rreview,Rsent_prob] siamese_model = Model( input = [ Lreview, Rreview ], output = siamese_out, name = "siamese_model" ) #TODO SGD is used in Lecunns paper; I am using RMSPROP instead for now #sgd = SGD( lr = 0.001, momentum = 0.0, decay = 0.0, nesterov = False ) siamese_model.compile( optimizer = 'rmsprop', loss = contrastiveLoss ) return { 'siamese': siamese_model, 'CNN': CNN_model, 'data' : (trainingSets, devSets, devKNNsets, testSets) }