def box_convolution(input, box_filters, l2): reduce = Conv3D(box_filters, 1, kernel_regularizer=l2)(input) squash = Conv3D(box_filters, (FOUR, FOUR, FOUR), kernel_regularizer=l2)(reduce) gather = Reshape((box_filters,))(squash) repeat = RepeatVector(FOUR * FOUR * FOUR)(gather) spread = Reshape((FOUR, FOUR, FOUR, box_filters))(repeat) return spread
def demo_create_encoder(latent_dim, cat_dim, window_size, input_dim): input_layer = Input(shape=(window_size, input_dim)) code = TimeDistributed(Dense(64, activation='linear'))(input_layer) code = Bidirectional(LSTM(128, return_sequences=True))(code) code = BatchNormalization()(code) code = ELU()(code) code = Bidirectional(LSTM(64))(code) code = BatchNormalization()(code) code = ELU()(code) cat = Dense(64)(code) cat = BatchNormalization()(cat) cat = PReLU()(cat) cat = Dense(cat_dim, activation='softmax')(cat) latent_repr = Dense(64)(code) latent_repr = BatchNormalization()(latent_repr) latent_repr = PReLU()(latent_repr) latent_repr = Dense(latent_dim, activation='linear')(latent_repr) decode = Concatenate()([latent_repr, cat]) decode = RepeatVector(window_size)(decode) decode = Bidirectional(LSTM(64, return_sequences=True))(decode) decode = ELU()(decode) decode = Bidirectional(LSTM(128, return_sequences=True))(decode) decode = ELU()(decode) decode = TimeDistributed(Dense(64))(decode) decode = ELU()(decode) decode = TimeDistributed(Dense(input_dim, activation='linear'))(decode) error = Subtract()([input_layer, decode]) return Model(input_layer, [decode, latent_repr, cat, error])
def create_model(steps_before, steps_after, feature_count): """ creates, compiles and returns a RNN model @param steps_before: the number of previous time steps (input) @param steps_after: the number of posterior time steps (output or predictions) @param feature_count: the number of features in the model @param hidden_neurons: the number of hidden neurons per LSTM layer """ DROPOUT = 0.5 LAYERS = 2 hidden_neurons = 300 model = Sequential() model.add( LSTM(input_dim=feature_count, output_dim=hidden_neurons, return_sequences=False)) model.add(RepeatVector(steps_after)) model.add(LSTM(output_dim=hidden_neurons, return_sequences=True)) model.add(TimeDistributed(Dense(feature_count))) model.add(Activation('linear')) model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['accuracy']) return model
def __init__(self): #self.inception = InceptionResNetV2(weights=None, include_top=True) #self.inception.load_weights('/data/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5') #inception.graph = tf.get_default_graph() embed_input = Input(shape=(1000,)) encoder_input = Input(shape=(256, 256, 1,)) #encoder_output=GaussianNoise(0.1)(encoder_input) encoder_output = Conv2D(64, (3, 3), activation='relu', padding='same', strides=2)(encoder_input) encoder_output = Conv2D(128, (3, 3), activation='relu', padding='same')(encoder_output) encoder_output = Conv2D(128, (3, 3), activation='relu', padding='same', strides=2)(encoder_output) encoder_output = Conv2D(256, (3, 3), activation='relu', padding='same')(encoder_output) encoder_output = Conv2D(256, (3, 3), activation='relu', padding='same', strides=2)(encoder_output) encoder_output = Conv2D(512, (3, 3), activation='relu', padding='same')(encoder_output) encoder_output = Conv2D(512, (3, 3), activation='relu', padding='same')(encoder_output) encoder_output = Conv2D(256, (3, 3), activation='relu', padding='same')(encoder_output) # Fusion fusion_output = RepeatVector(32 * 32)(embed_input) fusion_output = Reshape(([32, 32, 1000]))(fusion_output) fusion_output = concatenate([encoder_output, fusion_output], axis=3) fusion_output = Conv2D(256, (1, 1), activation='relu', padding='same')(fusion_output) # Decoder decoder_output = Conv2D(128, (3, 3), activation='relu', padding='same')(fusion_output) decoder_output = UpSampling2D((2, 2))(decoder_output) decoder_output = Conv2D(64, (3, 3), activation='relu', padding='same')(decoder_output) decoder_output = UpSampling2D((2, 2))(decoder_output) decoder_output = Conv2D(32, (3, 3), activation='relu', padding='same')(decoder_output) decoder_output = Conv2D(16, (3, 3), activation='relu', padding='same')(decoder_output) decoder_output = Conv2D(2, (3, 3), activation='tanh', padding='same')(decoder_output) decoder_output = UpSampling2D((2, 2))(decoder_output) model = Model(inputs=[encoder_input, embed_input], outputs=decoder_output) model.compile(optimizer="adagrad", loss='mse') self.model = model
def build_model(): # epoch, dropout = best_model() epoch, dropout = 5, 0.2 print('EPOCH = ', epoch) print('DROPOUT = ', dropout) model = Sequential() model.add( Embedding(input_dim=ger_vocab_size, output_dim=128, input_length=11)) model.add(LSTM(128)) model.add(RepeatVector(11)) model.add(LSTM(128, return_sequences=True)) model.add(Dropout(dropout)) model.add(Dense(eng_vocab_size, activation='softmax')) model.compile(optimizer=RMSprop(lr=0.01), loss='sparse_categorical_crossentropy', metrics=['acc']) model.summary() # Train model history = model.fit(X_train, y_train.reshape(y_train.shape[0], y_train.shape[1], 1), epochs=epoch, batch_size=128, verbose=1, validation_split=0.2) # Evaluate the model loss, accuracy = model.evaluate(X_test, y_test.reshape( y_test.shape[0], y_test.shape[1], 1), verbose=1) print('Accuracy: %f' % (accuracy * 100)) def display(): plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() display()
def __init__(self, input_shape, output_size): image_model = tf.keras.Sequential() image_model.add( Conv2D(32, (3, 3), padding='valid', activation='relu', input_shape=input_shape)) image_model.add(Conv2D(32, (3, 3), padding='valid', activation='relu')) image_model.add(MaxPooling2D(pool_size=(2, 2))) image_model.add(Dropout(0.25)) image_model.add(Conv2D(64, (3, 3), padding='valid', activation='relu')) image_model.add(Conv2D(64, (3, 3), padding='valid', activation='relu')) image_model.add(MaxPooling2D(pool_size=(2, 2))) image_model.add(Dropout(0.25)) image_model.add(Conv2D(128, (3, 3), padding='valid', activation='relu')) image_model.add(Conv2D(128, (3, 3), padding='valid', activation='relu')) image_model.add(MaxPooling2D(pool_size=(2, 2))) image_model.add(Dropout(0.25)) image_model.add(Flatten()) image_model.add(Dense(1024, activation='relu')) image_model.add(Dropout(0.3)) image_model.add(Dense(1024, activation='relu')) image_model.add(Dropout(0.3)) image_model.add(RepeatVector(CONTEXT_LENGTH)) visual_input = Input(shape=input_shape) encoded_image = image_model(visual_input) language_model = Sequential() language_model.add( tf.keras.layers.LSTM(128, return_sequences=True, input_shape=(CONTEXT_LENGTH, output_size))) language_model.add(tf.keras.layers.LSTM(128, return_sequences=True)) textual_input = Input(shape=(CONTEXT_LENGTH, output_size)) encoded_text = language_model(textual_input) decoder = concatenate([encoded_image, encoded_text]) decoder = tf.keras.layers.LSTM(512, return_sequences=True)(decoder) decoder = tf.keras.layers.LSTM(512, return_sequences=False)(decoder) decoder = Dense(output_size, activation='softmax')(decoder) super().__init__(inputs=[visual_input, textual_input], outputs=decoder) optimizer = RMSprop(lr=0.0001, clipvalue=1.0) self.compile(loss='categorical_crossentropy', optimizer=optimizer)
def best_model(): epochs = [5, 10, 15, 20] dropout_rate = [0.1, 0.2, 0.3] list_of_all_scores = list() list_of_scores = list() list_of_dropout = list() list_of_all_dropouts = list() list_of_epochs = list() for i in dropout_rate: model = Sequential() model.add( Embedding(input_dim=ger_vocab_size, output_dim=128, input_length=11)) model.add(LSTM(128)) model.add(RepeatVector(11)) model.add(LSTM(128, return_sequences=True)) model.add(Dropout(i)) model.add(Dense(eng_vocab_size, activation='softmax')) model.compile(optimizer=RMSprop(lr=0.01), loss='sparse_categorical_crossentropy', metrics=['acc']) list_of_dropout.append(i) for e in epochs: list_of_all_dropouts.append(i) list_of_epochs.append(e) model.fit(X_train, y_train.reshape(y_train.shape[0], y_train.shape[1], 1), epochs=e, batch_size=128, verbose=1, validation_split=0.2) score = model.evaluate(X_test, y_test.reshape( y_test.shape[0], y_test.shape[1], 1), verbose=1) list_of_all_scores.append(score) if score not in list_of_scores: list_of_scores.append(score) #print('Dropout:', i, '\n', 'Epoch:', e, '\n', 'Score:', float(score)) lowest = min(list_of_all_scores) num = list_of_scores.index(lowest) epoch = list_of_epochs[num] dropout = list_of_all_dropouts[num] print('Lowest score:', lowest, 'Epoch:', epoch, 'Dropout', dropout) return epoch, dropout
def __init__(self, words, image_count_words, *args, max_code_length, activation='relu', order_layer_output_size=1024, kernel_shape=7, dropout_ratio=0.25, dense_layer_size=512, image_out=False, **kwargs): super().__init__(*args, **kwargs) self.image_out = image_out self.voc_size = len(words) self.image_out = image_out self.layer_output_names = words self.image_count_words = image_count_words self.max_code_length = max_code_length self.shallow_cnn_unit = ShallowCnnUnit( image_count_words=image_count_words, kernel_shape=kernel_shape, dropout_ratio=dropout_ratio, activation=activation, name='cnn_unit') self.parallel_counter_unit = CounterUnit(layer_size=dense_layer_size, activation=activation, name='counter_unit') self.ordering_layers = [ Flatten(name='ordering_flatten'), Dense(1024, activation=activation, name='ordering_1'), Dropout(dropout_ratio, name='ordering_drop_1'), Dense(1024, activation=activation, name='ordering_2'), Dropout(dropout_ratio, name='ordering_drop_2'), Dense(order_layer_output_size, activation=activation, name='ordering_3') ] self.repeat_image_layer = RepeatVector(max_code_length) self.language_model_layers = [ tf.keras.layers.LSTM(128, return_sequences=True), tf.keras.layers.LSTM(128, return_sequences=True) ] self.decoder_layers = [ tf.keras.layers.LSTM(512, return_sequences=True), tf.keras.layers.LSTM(512, return_sequences=True), Dense(len(words), activation='softmax') ]
def Colorize(): embed_input = Input(shape=(1000, )) # Encoder encoder_input = Input(shape=( 256, 256, 1, )) encoder_output = Conv2D(64, (3, 3), activation='relu', padding='same', strides=2)(encoder_input) encoder_output = Conv2D(128, (3, 3), activation='relu', padding='same')(encoder_output) encoder_output = Conv2D(128, (3, 3), activation='relu', padding='same', strides=2)(encoder_output) encoder_output = Conv2D(256, (3, 3), activation='relu', padding='same')(encoder_output) encoder_output = Conv2D(256, (3, 3), activation='relu', padding='same', strides=2)(encoder_output) encoder_output = Conv2D(512, (3, 3), activation='relu', padding='same')(encoder_output) encoder_output = Conv2D(512, (3, 3), activation='relu', padding='same')(encoder_output) encoder_output = Conv2D(256, (3, 3), activation='relu', padding='same')(encoder_output) # Fusion fusion_output = RepeatVector(32 * 32)(embed_input) fusion_output = Reshape(([32, 32, 1000]))(fusion_output) fusion_output = concatenate([encoder_output, fusion_output], axis=3) fusion_output = Conv2D(256, (1, 1), activation='relu', padding='same')(fusion_output) # Decoder decoder_output = Conv2D(128, (3, 3), activation='relu', padding='same')(fusion_output) decoder_output = UpSampling2D((2, 2))(decoder_output) decoder_output = Conv2D(64, (3, 3), activation='relu', padding='same')(decoder_output) decoder_output = UpSampling2D((2, 2))(decoder_output) decoder_output = Conv2D(32, (3, 3), activation='relu', padding='same')(decoder_output) decoder_output = Conv2D(16, (3, 3), activation='relu', padding='same')(decoder_output) decoder_output = Conv2D(2, (3, 3), activation='tanh', padding='same')(decoder_output) decoder_output = UpSampling2D((2, 2))(decoder_output) return Model(inputs=[encoder_input, embed_input], outputs=decoder_output)
def define_model(src_vocab, tar_vocab, src_timesteps, tar_timesteps, n_units): model = Sequential() model.add( Embedding(src_vocab, n_units, input_length=src_timesteps, mask_zero=True)) model.add(LSTM(n_units)) model.add(RepeatVector(tar_timesteps)) model.add(LSTM(n_units, return_sequences=True)) model.add(TimeDistributed(Dense(tar_vocab, activation='softmax'))) return model
def make_lstm(): inputs = Input(shape=(timesteps, input_dim)) #lstm1, state_h, state_c = LSTM(latent_dim,return_state=True)(inputs) encoded , state_h, state_c = LSTM(latent_dim,return_state=True)(inputs) #encoded = LSTM(latent_dim)(inputs) decoded = RepeatVector(timesteps)(encoded) decoded = LSTM(input_dim, return_sequences=True)(decoded) model1 = Model(inputs=inputs, outputs=[encoded , state_h, state_c]) sequence_autoencoder = Model(inputs, decoded) encoder = Model(inputs, encoded, decoded,state_h,state_c) sequence_autoencoder.compile(loss='mean_squared_error', optimizer='adam') return sequence_autoencoder, encoder, model1
def relational_model(): image_input_shape = (img_rows, img_cols, 3) text_inputs = Input(shape=(sequence_length, ), name='text_input') text_x = Embedding(vocab_size, 128)(text_inputs) text_x = LSTM(128)(text_x) image_inputs = Input(shape=image_input_shape, name='image_input') image_x = Lambda(process_image)(image_inputs) image_x = Conv2D(24, kernel_size=(3, 3), strides=2, activation='relu')(image_inputs) image_x = BatchNormalization()(image_x) image_x = Conv2D(24, kernel_size=(3, 3), strides=2, activation='relu')(image_x) image_x = BatchNormalization()(image_x) image_x = Conv2D(24, kernel_size=(3, 3), strides=2, activation='relu')(image_x) image_x = BatchNormalization()(image_x) image_x = Conv2D(24, kernel_size=(3, 3), strides=2, activation='relu')(image_x) image_x = BatchNormalization()(image_x) shape = K.int_shape(image_x) RN_inputs = Input(shape=(1, (2 * shape[3]) + K.int_shape(text_x)[1])) RN_x = Dense(256, activation='relu')(RN_inputs) RN_x = Dense(256, activation='relu')(RN_x) RN_x = Dense(256, activation='relu')(RN_x) RN_x = Dropout(.5)(RN_x) RN_outputs = Dense(256, activation='relu')(RN_x) RN = Model(inputs=RN_inputs, outputs=RN_outputs) relations = Lambda(get_relation_vectors)( image_x) # Get tensor [batch, relation_ID, relation_vectors] question = RepeatVector(K.int_shape(relations)[1])( text_x) # Shape question vector to same size as relations relations = Concatenate(axis=2)([ relations, question ]) # Merge tensors [batch, relation_ID, relation_vectors, question_vector] g = TimeDistributed(RN)( relations) # TimeDistributed applies RN to relation vectors. g = Lambda(lambda x: K.sum(x, axis=1))(g) # Sum over relation_ID f = Dense(256, activation='relu')(g) f = Dropout(.5)(f) f = Dense(256, activation='relu')(f) f = Dropout(.5)(f) outputs = Dense(num_labels, activation='softmax')(f) ## Train model model = Model(inputs=[text_inputs, image_inputs], outputs=outputs) return model
def make_lstm(): inputs = Input(shape=(timesteps, input_dim)) encoded = LSTM(latent_dim)(inputs) decoded = RepeatVector(timesteps)(encoded) decoded = LSTM(input_dim, return_sequences=True)(decoded) sequence_autoencoder = Model(inputs, decoded) encoder = Model(inputs, encoded) sequence_autoencoder.compile(loss='mean_squared_error', optimizer='adam') print(sequence_autoencoder.summary()) print(encoder.summary()) return sequence_autoencoder, encoder
def attention_3d_block(inputs, SINGLE_ATTENTION_VECTOR=False): # inputs.shape = (batch_size, time_steps, input_dim) input_dim = int(inputs.shape[2]) TIME_STEPS = int(inputs.shape[1]) a = Permute((2, 1))(inputs) a = Reshape( (input_dim, TIME_STEPS) )(a) # this line is not useful. It's just to know which dimension is what. a = Dense(TIME_STEPS, activation='softmax')(a) if SINGLE_ATTENTION_VECTOR: a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a) a = RepeatVector(input_dim)(a) a_probs = Permute((2, 1), name='attention_vec')(a) output_attention_mul = concatenate([inputs, a_probs], name='attention_mul') return output_attention_mul
def attention_3d_block(inputs): # inputs.shape = (batch_size, time_steps, input_dim) input_dim = int(inputs.shape[2]) a = inputs AveragePooling = pooling.GlobalAveragePooling1D( data_format='channels_last')(a) den1 = Dense(input_dim, activation='relu')(AveragePooling) den2 = Dense(input_dim, activation='hard_sigmoid')(den1) if SINGLE_ATTENTION_VECTOR: a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(den2) a = RepeatVector(input_dim)(a) a_probs = Permute((1, 2), name='attention_vec')(a) output_attention_mul = merge.multiply([inputs, a_probs], name='attention_mul') # output_attention_mul = merge([inputs, a_probs], name='attention_mul', mode='mul') # 旧版本 return output_attention_mul
def __init__(self, output_size, *args, context_length=CONTEXT_LENGTH, activation='relu', kernel_shape=3, dropout_ratio=0.25, **kwargs): super().__init__(*args, **kwargs) self.context_length = context_length self.output_size = output_size self.image_model_layers = [ Conv2D(32, kernel_shape, padding='valid', activation=activation), Conv2D(32, kernel_shape, padding='valid', activation=activation), MaxPooling2D(pool_size=(2, 2)), Dropout(dropout_ratio), Conv2D(64, kernel_shape, padding='valid', activation=activation), Conv2D(64, kernel_shape, padding='valid', activation=activation), MaxPooling2D(pool_size=(2, 2)), Dropout(dropout_ratio), Conv2D(128, kernel_shape, padding='valid', activation=activation), Conv2D(128, kernel_shape, padding='valid', activation=activation), MaxPooling2D(pool_size=(2, 2)), Dropout(dropout_ratio), Flatten(), Dense(1024, activation='relu'), Dropout(0.3), Dense(1024, activation='relu'), Dropout(0.3), RepeatVector(context_length) ] self.language_model_layers = [ tf.keras.layers.LSTM( 128, return_sequences=True ), # , input_shape=(CONTEXT_LENGTH, output_size))) tf.keras.layers.LSTM(128, return_sequences=True) ] self.decoder_layers = [ tf.keras.layers.LSTM(512, return_sequences=True), tf.keras.layers.LSTM(512, return_sequences=False), Dense(output_size, activation='softmax') ]
def build_encoder_decoder(self): x = Input(batch_shape=(None, self.emdedding_size)) x_embed = Embedding(input_dim=self.vocab_size, output_dim=self.emdedding_size, weights=[self.pretrained_weights], trainable=False)(x) print("x_embed: ", x_embed) h = LSTM(self.intermediate_dim, return_sequences=False, recurrent_dropout=0.2)(x_embed) z_mean = Dense(self.latent_dim, name="z_mean")(h) z_log_var = Dense(self.latent_dim, name="z_log_var")(h) z = Sampling()([z_mean, z_log_var]) encoder = Model(x, [z_mean, z_log_var, z], name="encoder") encoder.summary() # build a generator that can sample sentences from the learned distribution # we instantiate these layers separately so as to reuse them later repeated_context = RepeatVector(self.emdedding_size) decoder_h = LSTM(self.intermediate_dim, return_sequences=True, recurrent_dropout=0.2) decoder_mean = Dense(self.emdedding_size, activation='linear', name='decoder_mean') h_decoded = decoder_h(repeated_context(z)) x_decoded_mean = decoder_mean(h_decoded) decoder_input = Input(shape=(self.latent_dim, )) _h_decoded = decoder_h(repeated_context(decoder_input)) _x_decoded_mean = decoder_mean(_h_decoded) _x_decoded_mean = Activation('relu', name="relu")(_x_decoded_mean) _x_decoded_out = Reshape((4096, ))(_x_decoded_mean) _x_decoded_out = Dense(self.emdedding_size, activation='linear', name='decoder_out')(_x_decoded_out) decoder = Model(decoder_input, _x_decoded_out, name="decoder") decoder.summary() return encoder, decoder
def caption_model(max_len=33, vocab_size=10431, train=False, feature_extractor_model=None): print(f'Vocab size = {vocab_size}') print(f'Max sequence lenth = {max_len}\n') if train: print('Creating training network. . .\nInput shape=(None,2048)') inp_1 = Input(shape=(2048, ), name='image_embedding') else: print( 'Creating inference network. . .\nInput shape=(None, 299, 299, 3)') inp_1 = feature_extractor_model.output y = Dense(units=300, activation='relu', name='image_embedding_dense')(inp_1) y = RepeatVector(max_len, name='repeat_layer')(y) inp_2 = Input(shape=(max_len, ), name='partial_captions') x = Embedding(input_dim=vocab_size + 1, output_dim=300, input_length=max_len)(inp_2) x = LSTM(units=256, return_sequences=True)(x) x = TimeDistributed(Dense(units=300, activation='linear'))(x) merge_layer = add([y, x], name=f'add_{train}') z = Bidirectional(LSTM(units=256, return_sequences=False), name='Bidirectional-LSTM1')(merge_layer) out = Dense(units=vocab_size + 1, activation='softmax', name='word_output')(z) print( f'Successfully created training network. . .\nOutput shape=(None,{vocab_size+1})\n' ) if train: return Model(inputs=[inp_1, inp_2], outputs=out, name='Caption-Model') return Model(inputs=[feature_extractor_model.input, inp_2], outputs=out, name='c-Model')
def create_lstm_autoencoder( input_dimension, output_dimension=None, units=100, dropout_rate=0.2, activation='relu', optimizer='adam', loss='mse', stateful=False, number_of_features=1, ) -> Model: if output_dimension is None: output_dimension = input_dimension input_layer = Input(shape=(input_dimension, number_of_features)) layer = input_layer layer = LSTM(units, activation=activation, stateful=stateful, return_sequences=False)(layer) layer = Dropout(rate=dropout_rate)(layer) encoder = layer layer = RepeatVector(output_dimension)(layer) layer = Dropout(rate=dropout_rate)(layer) layer = LSTM(units, activation=activation, stateful=stateful, return_sequences=True)(layer) layer = TimeDistributed(Dense(number_of_features))(layer) decoder = layer model = Model(input_layer, decoder) model.compile(optimizer=optimizer, loss=loss) return model
sequence_input = Input(shape=(data.shape[1], ), dtype='int32') # (Batch size, embedded_sequences = embedding_layer(sequence_input) x = Bidirectional(LSTM(UNITS, return_sequences=True, dropout=DROP, activity_regularizer=k.regularizers.l2(REG)), merge_mode='concat')( embedded_sequences) # (batch_size, timesteps, units) a = TimeDistributed(Dense(UNITS, activity_regularizer=k.regularizers.l2(REG)))(x) attention = TimeDistributed(Dense(1, activation='tanh', name='timeDense'))( a) # (batch_size, timesteps, 1) attention = Flatten()(attention) # (batch size, timesteps) attention = Activation('softmax')(attention) # (batch, timesteps) attention = RepeatVector(UNITS * 2)(attention) # (batch, units, timesteps) attention = Permute([2, 1])(attention) #(batch, timesteps, units) rejoined = multiply([x, attention]) # rejoined = k.backend.sum(rejoined, axis=-2 , keepdims=False)(rejoined) # x = LSTM(UNITS, return_sequences=True, dropout=DROP, activity_regularizer=k.regularizers.l2(REG))(rejoined) # (batch_size, timesteps, units) # x = TimeDistributed(Dense(UNITS, activation='relu', activity_regularizer=k.regularizers.l2(REG)))(x) # attention = TimeDistributed(Dense(1, activation='tanh', name='timeDense'))(x) # (batch_size, timesteps, 1) # attention = Flatten()(attention) # (batch size, timesteps) # attention = Activation('softmax')(attention) # (batch, timesteps) # attention = RepeatVector(UNITS)(attention) # (batch, units, timesteps) # attention = Permute([2,1])(attention) #(batch, timesteps, units) # rejoined = multiply([x, attention]) interm = LSTM(UNITS, activity_regularizer=k.regularizers.l2(REG), dropout=DROP)(rejoined) interm = Dense(UNITS,
def __init__(self, fl, mode, hparams): """ Initialises new DNN model based on input features_dim, labels_dim, hparams :param features_dim: Number of input feature nodes. Integer :param labels_dim: Number of output label nodes. Integer :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function. hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes. """ # self.features_dim = fl.features_c_dim # self.labels_dim = fl.labels_dim # Assuming that each task has only 1 dimensional output self.features_dim = fl.features_c_dim + 1 # 1 for the positional argument self.labels_dim = 1 self.numel = fl.labels.shape[1] + 1 self.hparams = hparams self.mode = mode self.normalise_labels = fl.normalise_labels self.labels_scaler = fl.labels_scaler features_in = Input(shape=(self.features_dim, ), name='main_features_c_input') # Selection of model if mode == 'ann': model = ann(self.features_dim, self.labels_dim, self.hparams) x = model(features_in) self.model = Model(inputs=features_in, outputs=x) elif mode == 'ann2': model_1 = ann(self.features_dim, 50, self.hparams) x = model_1(features_in) model_end = ann(50, 50, self.hparams) end = model_end(x) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) model_2 = ann(50, self.labels_dim - 1, self.hparams) x = model_2(x) self.model = Model(inputs=features_in, outputs=[end_node, x]) elif mode == 'ann3': x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(0))(features_in) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(2))(x) # x = BatchNormalization()(x) x = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_set_19')(x) self.model = Model(inputs=features_in, outputs=x) elif mode == 'conv1': x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='shared' + str(1))(features_in) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) #x = BatchNormalization()(x) x = Dense(units=19, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_set_19')(x) #x = BatchNormalization()(x) x = Reshape(target_shape=(19, 1))(x) x = Conv1D(filters=hparams['filters'], kernel_size=3, strides=1, padding='same', activation='relu')(x) #x = BatchNormalization()(x) x = Conv1D(filters=hparams['filters'] * 2, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = Conv1D(filters=hparams['filters'] * 4, kernel_size=3, strides=1, padding='same', activation='relu')(x) #x = Permute((2,1))(x) #x = GlobalAveragePooling1D()(x) x = TimeDistributed(Dense(1, activation='linear'))(x) x = Reshape(target_shape=(19, ))(x) self.model = Model(inputs=features_in, outputs=x) elif mode == 'conv2': x = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(1))(features_in) x = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(2))(x) end = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(1))(x) end = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(2))(end) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) x = Dense(units=80, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Reshape(target_shape=(80, 1))(x) x = Conv1D(filters=8, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) x = Conv1D(filters=16, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) #x = Permute((2,1))(x) #x = GlobalAveragePooling1D()(x) x = TimeDistributed(Dense(1, activation='linear'))(x) x = Reshape(target_shape=(20, ))(x) self.model = Model(inputs=features_in, outputs=[end_node, x]) elif mode == 'lstm': x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(1))(features_in) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(2))(x) end = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(1))(x) end = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(2))(end) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(2))(x) x = RepeatVector(n=20)(x) x = LSTM(units=30, activation='relu', return_sequences=True)(x) x = LSTM(units=30, activation='relu', return_sequences=True)(x) x = TimeDistributed(Dense(1))(x) x = Reshape(target_shape=(20, ))(x) ''' x = Permute((2,1))(x) x = GlobalAveragePooling1D()(x) ''' self.model = Model(inputs=features_in, outputs=[end_node, x]) optimizer = Adam(clipnorm=1) self.model.compile(optimizer=optimizer, loss='mean_squared_error')
def __init__(self, fl, mode, hparams): """ Initialises new DNN model based on input features_dim, labels_dim, hparams :param features_dim: Number of input feature nodes. Integer :param labels_dim: Number of output label nodes. Integer :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function. hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes. """ self.features_dim = fl.features_c_dim self.labels_dim = fl.labels_dim # Assuming that each task has only 1 dimensional output self.hparams = hparams self.mode = mode self.normalise_labels = fl.normalise_labels self.labels_scaler = fl.labels_scaler features_in = Input(shape=(self.features_dim, ), name='main_features_c_input') # Selection of model if mode == 'ann': model = ann(self.features_dim, self.labels_dim, self.hparams) x = model(features_in) self.model = Model(inputs=features_in, outputs=x) elif mode == 'ann2': model_1 = ann(self.features_dim, 50, self.hparams) x = model_1(features_in) model_end = ann(50, 50, self.hparams) end = model_end(x) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) model_2 = ann(50, self.labels_dim - 1, self.hparams) x = model_2(x) self.model = Model(inputs=features_in, outputs=[end_node, x]) elif mode == 'ann3': x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(0))(features_in) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(2))(x) # x = BatchNormalization()(x) x = Dense(units=self.labels_dim, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Final')(x) self.model = Model(inputs=features_in, outputs=x) elif mode == 'conv1': if fl.label_type == 'gf20': final_dim = 20 else: final_dim = 19 x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='shared' + str(1))(features_in) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) #x = BatchNormalization()(x) x = Dense(units=final_dim, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_set_19')(x) #x = BatchNormalization()(x) x = Reshape(target_shape=(final_dim, 1))(x) x = Conv1D(filters=hparams['filters'], kernel_size=3, strides=1, padding='same', activation='relu')(x) #x = BatchNormalization()(x) x = Conv1D(filters=hparams['filters'] * 2, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = Conv1D(filters=hparams['filters'] * 4, kernel_size=3, strides=1, padding='same', activation='relu')(x) #x = Permute((2,1))(x) #x = GlobalAveragePooling1D()(x) x = TimeDistributed(Dense(1, activation='linear'))(x) x = Reshape(target_shape=(final_dim, ))(x) self.model = Model(inputs=features_in, outputs=x) elif mode == 'conv2': x = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(1))(features_in) x = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(2))(x) end = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(1))(x) end = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(2))(end) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) x = Dense(units=80, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Reshape(target_shape=(80, 1))(x) x = Conv1D(filters=8, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) x = Conv1D(filters=16, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) #x = Permute((2,1))(x) #x = GlobalAveragePooling1D()(x) x = TimeDistributed(Dense(1, activation='linear'))(x) x = Reshape(target_shape=(20, ))(x) self.model = Model(inputs=features_in, outputs=[end_node, x]) elif mode == 'lstm': x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(1))(features_in) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(2))(x) end = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(1))(x) end = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(2))(end) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(2))(x) x = RepeatVector(n=20)(x) x = LSTM(units=30, activation='relu', return_sequences=True)(x) x = LSTM(units=30, activation='relu', return_sequences=True)(x) x = TimeDistributed(Dense(1))(x) x = Reshape(target_shape=(20, ))(x) ''' x = Permute((2,1))(x) x = GlobalAveragePooling1D()(x) ''' self.model = Model(inputs=features_in, outputs=[end_node, x]) optimizer = Adam(learning_rate=hparams['learning_rate'], clipnorm=1) def weighted_mse(y_true, y_pred): loss_weights = np.sqrt(np.arange(1, 20)) #loss_weights = np.arange(1, 20) return K.mean(K.square(y_pred - y_true) * loss_weights, axis=-1) def haitao_error(y_true, y_pred): diff = K.abs( (y_true - y_pred) / K.reshape(K.clip(K.abs(y_true[:, -1]), K.epsilon(), None), (-1, 1))) return 100. * K.mean(diff, axis=-1) if hparams['loss'] == 'mape': self.model.compile(optimizer=optimizer, loss=MeanAbsolutePercentageError()) elif hparams['loss'] == 'haitao': self.model.compile(optimizer=optimizer, loss=haitao_error) elif hparams['loss'] == 'mse': self.model.compile(optimizer=optimizer, loss='mean_squared_error')
from tensorflow.python.keras.layers import LSTM, RepeatVector, TimeDistributed, Dense from src.helpers.helper_classifier import generate_data, invert n_samples = 200000 n_numbers = 2 largest = 10000 alphabet = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '+', ' '] n_chars = len(alphabet) n_in_seq_length = n_numbers * ceil(log10(largest + 1)) + n_numbers - 1 n_out_seq_length = ceil(log10(n_numbers * (largest + 1))) n_batch = 100 n_epoch = 500 model = Sequential([ LSTM(100, input_shape=(n_in_seq_length, n_chars)), RepeatVector(n_out_seq_length), LSTM(50, return_sequences=True), TimeDistributed(Dense(n_chars, activation='softmax')) ]) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) for i in range(n_epoch): x, y = generate_data(n_samples, largest, alphabet) model.fit(x, y, epochs=1, batch_size=n_batch) model.save('training/keras_classifier.h5') # evaluate on some new patterns x, y = generate_data(n_samples, largest, alphabet)
def create_model(num_encorder_paragraph_tokens, max_encoder_paragraph_seq_length, num_encoder_question_tokens, max_encoder_question_seq_length, num_decoder_tokens): hidden_units = 128 # 256, 128, 64 embed_hidden_units = 100 context_inputs = Input(shape=(None, ), name='context_inputs') encoded_context = Embedding(input_dim=num_encorder_paragraph_tokens, output_dim=embed_hidden_units, input_length=max_encoder_paragraph_seq_length, name='context_embedding')(context_inputs) encoded_context = Dropout(0.3)(encoded_context) question_inputs = Input(shape=(None, ), name='question_inputs') encoded_question = Embedding(input_dim=num_encoder_question_tokens, output_dim=embed_hidden_units, input_length=max_encoder_question_seq_length, name='question_embedding')(question_inputs) encoded_question = Dropout(0.3)(encoded_question) encoded_question = LSTM(units=embed_hidden_units, name='question_lstm')(encoded_question) encoded_question = RepeatVector(max_encoder_paragraph_seq_length)( encoded_question) merged = add([encoded_context, encoded_question]) encoder_lstm = LSTM(units=hidden_units, return_state=True, name='encoder_lstm') encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(merged) encoder_states = [encoder_state_h, encoder_state_c] decoder_inputs = Input(shape=(None, num_decoder_tokens), name='decoder_inputs') decoder_lstm = LSTM(units=hidden_units, return_state=True, return_sequences=True, name='decoder_lstm') decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm( decoder_inputs, initial_state=encoder_states) decoder_dense = Dense(units=num_decoder_tokens, activation='softmax', name='decoder_dense') decoder_outputs = decoder_dense(decoder_outputs) model = Model([context_inputs, question_inputs, decoder_inputs], decoder_outputs) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) encoder_model = Model([context_inputs, question_inputs], encoder_states) decoder_state_inputs = [ Input(shape=(hidden_units, )), Input(shape=(hidden_units, )) ] decoder_outputs, state_h, state_c = decoder_lstm( decoder_inputs, initial_state=decoder_state_inputs) decoder_states = [state_h, state_c] decoder_outputs = decoder_dense(decoder_outputs) decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states) return model, encoder_model, decoder_model
def instance_network(shape): input_img = Input(shape=(*shape, 1, MAX_INSTANCES), name='fg_input_img') input_packed = Lambda(lambda x: pack_instance(x), name='fg_pack_input')(input_img) input_img_3 = Lambda(lambda x: tf.tile(x, [1, 1, 1, 3]), name='fg_input_tile')(input_packed) # VGG16 without top layers VGG_model = applications.vgg16.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) model_3 = Model(VGG_model.input, VGG_model.layers[-6].output, name='fg_model_3')(input_img_3) # Global features conv2d_6 = Conv2D(512, (3, 3), padding='same', strides=(2, 2), activation='relu', name='fg_conv2d_6')(model_3) batch_normalization_1 = BatchNormalization( name='fg_batch_normalization_1')(conv2d_6) conv2d_7 = Conv2D(512, (3, 3), padding='same', strides=(1, 1), activation='relu', name='fg_conv2d_7')(batch_normalization_1) batch_normalization_2 = BatchNormalization( name='fg_batch_normalization_2')(conv2d_7) conv2d_8 = Conv2D(512, (3, 3), padding='same', strides=(2, 2), activation='relu', name='fg_conv2d_8')(batch_normalization_2) batch_normalization_3 = BatchNormalization( name='fg_batch_normalization_3')(conv2d_8) conv2d_9 = Conv2D(512, (3, 3), padding='same', strides=(1, 1), activation='relu', name='fg_conv2d_9')(batch_normalization_3) batch_normalization_4 = BatchNormalization( name='fg_batch_normalization_4')(conv2d_9) # Global feature pass back to colorization + classification flatten_1 = Flatten(name='fg_flatten_1')(batch_normalization_4) dense_1 = Dense(1024, activation='relu', name='fg_dense_1')(flatten_1) dense_2 = Dense(512, activation='relu', name='fg_dense_2')(dense_1) dense_3 = Dense(256, activation='relu', name='fg_dense_3')(dense_2) repeat_vector_1 = RepeatVector(28 * 28, name='fg_repeat_vector_1')(dense_3) reshape_1 = Reshape((28, 28, 256), name='fg_reshape_1')(repeat_vector_1) # Mid-level features conv2d_10 = Conv2D(512, (3, 3), padding='same', strides=(1, 1), activation='relu', name='fg_conv2d_10')(model_3) batch_normalization_5 = BatchNormalization( name='fg_batch_normalization_5')(conv2d_10) conv2d_11 = Conv2D(256, (3, 3), padding='same', strides=(1, 1), activation='relu', name='fg_conv2d_11')(batch_normalization_5) batch_normalization_6 = BatchNormalization( name='fg_batch_normalization_6')(conv2d_11) # Fusion of (VGG16 -> Mid-level) + (VGG16 -> Global) + Colorization concatenate_2 = concatenate([batch_normalization_6, reshape_1], name='fg_concatenate_2') conv2d_12 = Conv2D(256, (1, 1), padding='same', strides=(1, 1), activation='relu', name='fg_conv2d_12')(concatenate_2) conv2d_13 = Conv2D(128, (3, 3), padding='same', strides=(1, 1), activation='relu', name='fg_conv2d_13')(conv2d_12) up_sampling2d_1 = UpSampling2D(size=(2, 2), name='fg_up_sampling2d_1', interpolation='bilinear')(conv2d_13) # conv2dt_1 = Conv2DTranspose(64, (4, 4), padding='same', strides=(2, 2), name='fg_conv2dt_1')(conv2d_13) conv2d_14 = Conv2D(64, (3, 3), padding='same', strides=(1, 1), activation='relu', name='fg_conv2d_14')(up_sampling2d_1) conv2d_15 = Conv2D(64, (3, 3), padding='same', strides=(1, 1), activation='relu', name='fg_conv2d_15')(conv2d_14) up_sampling2d_2 = UpSampling2D(size=(2, 2), name='fg_up_sampling2d_2', interpolation='bilinear')(conv2d_15) # conv2dt_2 = Conv2DTranspose(32, (4, 4), padding='same', strides=(2, 2), name='fg_conv2dt_2')(conv2d_15) conv2d_16 = Conv2D(32, (3, 3), padding='same', strides=(1, 1), activation='relu', name='fg_conv2d_16')(up_sampling2d_2) conv2d_17 = Conv2D(2, (3, 3), padding='same', strides=(1, 1), activation='sigmoid', name='fg_conv2d_17')(conv2d_16) # up_sampling2d_3 = UpSampling2D(size=(2, 2), name='fg_up_sampling2d_3')(conv2d_17) model_3_unpack = Lambda(lambda x: unpack_instance(x), name='fg_model_3_unpack')(model_3) conv2d_11_unpack = Lambda(lambda x: unpack_instance(x), name='fg_conv2d_11_unpack')(conv2d_11) conv2d_13_unpack = Lambda(lambda x: unpack_instance(x), name='fg_conv2d_13_unpack')(conv2d_13) conv2d_15_unpack = Lambda(lambda x: unpack_instance(x), name='fg_conv2d_15_unpack')(conv2d_15) conv2d_17_unpack = Lambda(lambda x: unpack_instance(x), name='fg_conv2d_17_unpack')(conv2d_17) generated = Model(inputs=input_img, outputs=[ model_3_unpack, conv2d_11_unpack, conv2d_13_unpack, conv2d_15_unpack, conv2d_17_unpack ]) return generated
def spread_axis(input, filters, permute_dims): gather = Reshape((FOUR * filters,))(input) repeat = RepeatVector(FOUR * FOUR)(gather) spread = Reshape((FOUR, FOUR, FOUR, filters))(repeat) permute = Permute(permute_dims)(spread) return permute
def fusion_network(shape, batch_size): input_img = Input(shape=(*shape, 1), name='input_img') input_img_3 = Lambda(lambda x: tf.tile(x, [1, 1, 1, 3]), name='input_tile')(input_img) bbox = Input(shape=(4, MAX_INSTANCES), name='bbox') mask = Input(shape=(*shape, MAX_INSTANCES), name='mask') # VGG16 without top layers VGG_model = applications.vgg16.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) vgg_model_3_pre = Model(VGG_model.input, VGG_model.layers[-6].output, name='model_3')(input_img_3) fg_model_3 = Input(shape=(*vgg_model_3_pre.get_shape().as_list()[1:], MAX_INSTANCES), name='fg_model_3') # <- vgg_model_3 = WeightGenerator(64, batch_size, name='weight_generator_1')( [fg_model_3, vgg_model_3_pre, bbox, mask]) # <- # Global features conv2d_6 = Conv2D(512, (3, 3), padding='same', strides=(2, 2), activation='relu', name='conv2d_6')(vgg_model_3) batch_normalization_1 = BatchNormalization( name='batch_normalization_1')(conv2d_6) conv2d_7 = Conv2D(512, (3, 3), padding='same', strides=(1, 1), activation='relu', name='conv2d_7')(batch_normalization_1) batch_normalization_2 = BatchNormalization( name='batch_normalization_2')(conv2d_7) conv2d_8 = Conv2D(512, (3, 3), padding='same', strides=(2, 2), activation='relu', name='conv2d_8')(batch_normalization_2) batch_normalization_3 = BatchNormalization( name='batch_normalization_3')(conv2d_8) conv2d_9 = Conv2D(512, (3, 3), padding='same', strides=(1, 1), activation='relu', name='conv2d_9')(batch_normalization_3) batch_normalization_4 = BatchNormalization( name='batch_normalization_4')(conv2d_9) # Classification flatten_2 = Flatten(name='flatten_2')(batch_normalization_4) dense_4 = Dense(4096, activation='relu', name='dense_4')(flatten_2) dense_5 = Dense(4096, activation='relu', name='dense_5')(dense_4) dense_6 = Dense(1000, activation='softmax', name='dense_6')(dense_5) # Global feature pass back to colorization + classification flatten_1 = Flatten(name='flatten_1')(batch_normalization_4) dense_1 = Dense(1024, activation='relu', name='dense_1')(flatten_1) dense_2 = Dense(512, activation='relu', name='dense_2')(dense_1) dense_3 = Dense(256, activation='relu', name='dense_3')(dense_2) repeat_vector_1 = RepeatVector(28 * 28, name='repeat_vector_1')(dense_3) reshape_1 = Reshape((28, 28, 256), name='reshape_1')(repeat_vector_1) # Mid-level features conv2d_10 = Conv2D(512, (3, 3), padding='same', strides=(1, 1), activation='relu', name='conv2d_10')(vgg_model_3) batch_normalization_5 = BatchNormalization( name='batch_normalization_5')(conv2d_10) conv2d_11_pre = Conv2D(256, (3, 3), padding='same', strides=(1, 1), activation='relu', name='conv2d_11')(batch_normalization_5) fg_conv2d_11 = Input(shape=(*conv2d_11_pre.get_shape().as_list()[1:], MAX_INSTANCES), name='fg_conv2d_11') # <- conv2d_11 = WeightGenerator(32, batch_size, name='weight_generator_2')( [fg_conv2d_11, conv2d_11_pre, bbox, mask]) # <- batch_normalization_6 = BatchNormalization( name='batch_normalization_6')(conv2d_11) # Fusion of (VGG16 -> Mid-level) + (VGG16 -> Global) + Colorization concatenate_2 = concatenate([batch_normalization_6, reshape_1], name='concatenate_2') conv2d_12 = Conv2D(256, (1, 1), padding='same', strides=(1, 1), activation='relu', name='conv2d_12')(concatenate_2) conv2d_13_pre = Conv2D(128, (3, 3), padding='same', strides=(1, 1), activation='relu', name='conv2d_13')(conv2d_12) fg_conv2d_13 = Input(shape=(*conv2d_13_pre.get_shape().as_list()[1:], MAX_INSTANCES), name='fg_conv2d_13') # <- conv2d_13 = WeightGenerator(16, batch_size, name='weight_generator_3')( [fg_conv2d_13, conv2d_13_pre, bbox, mask]) # <- # conv2dt_1 = Conv2DTranspose(64, (4, 4), padding='same', strides=(2, 2), name='conv2dt_1')(conv2d_13) up_sampling2d_1 = UpSampling2D(size=(2, 2), name='up_sampling2d_1', interpolation='bilinear')(conv2d_13) conv2d_14 = Conv2D(64, (3, 3), padding='same', strides=(1, 1), activation='relu', name='conv2d_14')(up_sampling2d_1) conv2d_15_pre = Conv2D(64, (3, 3), padding='same', strides=(1, 1), activation='relu', name='conv2d_15')(conv2d_14) fg_conv2d_15 = Input(shape=(*conv2d_15_pre.get_shape().as_list()[1:], MAX_INSTANCES), name='fg_conv2d_15') # <- conv2d_15 = WeightGenerator(16, batch_size, name='weight_generator_4')( [fg_conv2d_15, conv2d_15_pre, bbox, mask]) # <- # conv2dt_2 = Conv2DTranspose(32, (4, 4), padding='same', strides=(2, 2), name='conv2dt_2')(conv2d_15) up_sampling2d_2 = UpSampling2D(size=(2, 2), name='up_sampling2d_2', interpolation='bilinear')(conv2d_15) conv2d_16 = Conv2D(32, (3, 3), padding='same', strides=(1, 1), activation='relu', name='conv2d_16')(up_sampling2d_2) conv2d_17_pre = Conv2D(2, (3, 3), padding='same', strides=(1, 1), activation='sigmoid', name='conv2d_17')(conv2d_16) fg_conv2d_17 = Input(shape=(*conv2d_17_pre.get_shape().as_list()[1:], MAX_INSTANCES), name='fg_conv2d_17') # <- conv2d_17 = WeightGenerator(16, batch_size, name='weight_generator_5')( [fg_conv2d_17, conv2d_17_pre, bbox, mask]) # <- # conv2dt_3 = Conv2DTranspose(2, (4, 4), padding='same', strides=(2, 2), name='conv2dt_3')(conv2d_17) up_sampling2d_3 = UpSampling2D(size=(2, 2), name='up_sampling2d_3', interpolation='bilinear')(conv2d_17) return Model(inputs=[ input_img, fg_model_3, fg_conv2d_11, fg_conv2d_13, fg_conv2d_15, fg_conv2d_17, bbox, mask ], outputs=[up_sampling2d_3, dense_6])
def __init__(self, lr=0.00017654, lat_input_shape=(64, ), screen_input_shape=( 64, 64, ), structured_input_shape=(2, ), verbose=False): """ https://keras.io/getting-started/functional-api-guide/#multi-input-and-multi-output-models https://keras.io/gett ing-started/functional-api-guide/#shared-layers https://blog.keras.io/building-autoencoders-in-keras.html """ # Gross hack, change later? self.lr = lr if verbose: print("Network structured input shape is", structured_input.get_shape()) print("Network screen input shape is", screen_input.get_shape()) print("Network latent input shape is", lat_input.get_shape()) # Create the two state encoding legs structured_input_a = Input(shape=structured_input_shape) lat_input_a = Input(shape=lat_input_shape) screen_input_a = Input(shape=screen_input_shape, ) structured_input_b = Input(shape=structured_input_shape) lat_input_b = Input(shape=lat_input_shape) screen_input_b = Input(shape=screen_input_shape) eng_state_a = [structured_input_a, lat_input_a, screen_input_a] eng_state_b = [structured_input_b, lat_input_b, screen_input_b] # We want to broadcast the structured input (x, y) into their own # channels, each with the same dimension as the screen input # We can then concatenate, then convolve over the whole tensor x = RepeatVector(64 * 64)(structured_input_a) x = Reshape((64, 64, 2))(x) structured_output_a = x x = RepeatVector(64 * 64)(structured_input_b) x = Reshape((64, 64, 2))(x) structured_output_b = x # Similar with the latent vector, except it will simply be repeated # column wise x = RepeatVector(64)(lat_input_a) x = Reshape((64, 64, 1))(x) lat_output_a = x x = RepeatVector(64)(lat_input_b) x = Reshape((64, 64, 1))(x) lat_output_b = x # The screen is the correct shape, just add a channel dimension x = Reshape((64, 64, 1))(screen_input_a) screen_output_a = x x = Reshape((64, 64, 1))(screen_input_b) screen_output_b = x x = concatenate([ screen_output_a, structured_output_a, lat_output_a, screen_output_b, structured_output_b, lat_output_b ], axis=-1) print("Hello, World!", x.shape) x = Conv2D(16, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) print("1", x.shape) x = Conv2D(32, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D(2)(x) print("2", x.shape) x = Conv2D(64, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) print("3", x.shape) x = Conv2D(128, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D(2)(x) print("4", x.shape) x = Conv2D(256, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) print("5", x.shape) x = Conv2D(512, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D(2)(x) print("6", x.shape) x = Conv2D(1024, (3, 3))(x) x = BatchNormalization()(x) x = Activation('relu')(x) print("7", x.shape) x = Conv2D(2, (1, 1))(x) x = Activation('linear')(x) x = AveragePooling2D()(x) print("8", x.shape) x = Activation("softmax")(x) print("9", x.shape) prob_output = Reshape((2, ))(x) print("10", prob_output.shape) self.probabilityNetwork = Model(inputs=eng_state_a + eng_state_b, outputs=[prob_output])
def one_hot(x): x = K.argmax(x) x = tf.one_hot(x, 78) x = RepeatVector(1)(x) return x
def layers(self): mean_input = Input(self.latent_size, self.batch_size, name="mean_input") stddev_input = Input(self.latent_size, self.batch_size, name="stddev_input") mask_input = Input(self.mask_input_shape, self.batch_size, name="mask_input") detail_input = Input(self.input_shape, self.batch_size, name="detail_input") ########################## # Detail encoder network # ########################## # 128x128x3 detail_net = Conv2D(filters=16, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_input) detail_net = BatchNormalization()(detail_net) detail_net = LeakyReLU(alpha=self.leak)(detail_net) detail_net = Conv2D(filters=16, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_net) detail_net = BatchNormalization()(detail_net) detail128x128x16 = LeakyReLU(alpha=self.leak)(detail_net) detail_net = MaxPooling2D(pool_size=2)(detail128x128x16) detail_net = Dropout(self.dropout)(detail_net) # 64x64x16 detail_net = Conv2D(filters=32, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_net) detail_net = BatchNormalization()(detail_net) detail_net = LeakyReLU(alpha=self.leak)(detail_net) detail_net = Conv2D(filters=32, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_net) detail_net = BatchNormalization()(detail_net) detail64x64x32 = LeakyReLU(alpha=self.leak)(detail_net) detail_net = MaxPooling2D(pool_size=2)(detail64x64x32) detail_net = Dropout(self.dropout)(detail_net) # 32x32x32 detail_net = Conv2D(filters=64, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_net) detail_net = BatchNormalization()(detail_net) detail_net = LeakyReLU(alpha=self.leak)(detail_net) detail_net = Conv2D(filters=64, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_net) detail_net = BatchNormalization()(detail_net) detail32x32x64 = LeakyReLU(alpha=self.leak)(detail_net) detail_net = MaxPooling2D(pool_size=2)(detail32x32x64) detail_net = Dropout(self.dropout)(detail_net) # 16x16x64 detail_net = Conv2D(filters=128, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_net) detail_net = BatchNormalization()(detail_net) detail_net = LeakyReLU(alpha=self.leak)(detail_net) detail_net = Conv2D(filters=128, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_net) detail_net = BatchNormalization()(detail_net) detail16x16x128 = LeakyReLU(alpha=self.leak)(detail_net) detail_net = MaxPooling2D(pool_size=2)(detail16x16x128) detail_net = Dropout(self.dropout)(detail_net) # 8x8x128 detail_net = Conv2D(filters=256, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_net) detail_net = BatchNormalization()(detail_net) detail_net = LeakyReLU(alpha=self.leak)(detail_net) detail_net = Conv2D(filters=256, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_net) detail_net = BatchNormalization()(detail_net) detail8x8x256 = LeakyReLU(alpha=self.leak)(detail_net) detail_net = MaxPooling2D(pool_size=2)(detail8x8x256) detail_net = Dropout(self.dropout)(detail_net) # 4x4x256 detail_net = Conv2D(filters=512, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_net) detail_net = BatchNormalization()(detail_net) detail_net = LeakyReLU(alpha=self.leak)(detail_net) detail_net = Conv2D(filters=512, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_net) detail_net = BatchNormalization()(detail_net) detail4x4x512 = LeakyReLU(alpha=self.leak)(detail_net) detail_net = MaxPooling2D(pool_size=2)(detail4x4x512) detail_net = Dropout(self.dropout)(detail_net) # 2x2x512 detail_net = Conv2D(filters=1024, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_net) detail_net = BatchNormalization()(detail_net) detail_net = LeakyReLU(alpha=self.leak)(detail_net) detail_net = Conv2D(filters=1024, kernel_size=3, use_bias=True, data_format='channels_last', padding='same')(detail_net) detail_net = BatchNormalization()(detail_net) detail2x2x1024 = LeakyReLU(alpha=self.leak)(detail_net) detail_net = MaxPooling2D(pool_size=2)(detail2x2x1024) # 1x1x1024 detail_net = Dropout(self.dropout)(detail_net) ######################## # Mask encoder network # ######################## # {frames}x128x128x3 mask_net = TimeDistributed( Conv2D(filters=16, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_input) mask_net = BatchNormalization()(mask_net) mask_net = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed( Conv2D(filters=16, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_net) mask_net = BatchNormalization()(mask_net) mask128x128x16 = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed(MaxPooling2D(pool_size=2))(mask128x128x16) mask_net = TimeDistributed(Dropout(self.dropout))(mask_net) # {frames}x64x64x16 mask_net = TimeDistributed( Conv2D(filters=32, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_net) mask_net = BatchNormalization()(mask_net) mask_net = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed( Conv2D(filters=32, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_net) mask_net = BatchNormalization()(mask_net) mask64x64x32 = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed(MaxPooling2D(pool_size=2))(mask64x64x32) mask_net = TimeDistributed(Dropout(self.dropout))(mask_net) # {frames}x32x32x32 mask_net = TimeDistributed( Conv2D(filters=64, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_net) mask_net = BatchNormalization()(mask_net) mask_net = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed( Conv2D(filters=64, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_net) mask_net = BatchNormalization()(mask_net) mask32x32x64 = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed(MaxPooling2D(pool_size=2))(mask32x32x64) mask_net = TimeDistributed(Dropout(self.dropout))(mask_net) # {frames}x16x16x64 mask_net = TimeDistributed( Conv2D(filters=128, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_net) mask_net = BatchNormalization()(mask_net) mask_net = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed( Conv2D(filters=128, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_net) mask_net = BatchNormalization()(mask_net) mask16x16x128 = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed(MaxPooling2D(pool_size=2))(mask16x16x128) mask_net = TimeDistributed(Dropout(self.dropout))(mask_net) # {frames}x8x8x128 mask_net = TimeDistributed( Conv2D(filters=256, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_net) mask_net = BatchNormalization()(mask_net) mask_net = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed( Conv2D(filters=256, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_net) mask_net = BatchNormalization()(mask_net) mask8x8x256 = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed(MaxPooling2D(pool_size=2))(mask8x8x256) mask_net = TimeDistributed(Dropout(self.dropout))(mask_net) # {frames}x4x4x256 mask_net = TimeDistributed( Conv2D(filters=512, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_net) mask_net = BatchNormalization()(mask_net) mask_net = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed( Conv2D(filters=512, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_net) mask_net = BatchNormalization()(mask_net) mask4x4x512 = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed(MaxPooling2D(pool_size=2))(mask4x4x512) mask_net = TimeDistributed(Dropout(self.dropout))(mask_net) # {frames}x2x2x512 mask_net = TimeDistributed( Conv2D(filters=1024, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_net) mask_net = BatchNormalization()(mask_net) mask_net = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed( Conv2D(filters=1024, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(mask_net) mask_net = BatchNormalization()(mask_net) mask2x2x1024 = TimeDistributed(LeakyReLU(alpha=self.leak))(mask_net) mask_net = TimeDistributed(MaxPooling2D(pool_size=2))(mask2x2x1024) # {frames}x1x1x1024 mask_net = TimeDistributed(Dropout(self.dropout))(mask_net) mask_net = TimeDistributed(Flatten(name="mask_flatten"))(mask_net) epsilon = TimeDistributed(Dense(self.latent_size), name="epsilon")(mask_net) samples = SampleLayer( beta=self.beta, capacity=self.latent_size, epsilon_sequence=True, name="sampling_layer")([mean_input, stddev_input, epsilon]) ################### # Decoder network # ################### net = TimeDistributed(Dense(self.latent_size, activation='relu'))(samples) # reexpand the input from flat: net = TimeDistributed(Reshape((1, 1, self.latent_size)))(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x1x1x1024 net = TimeDistributed( Conv2DTranspose(1024, (3, 3), strides=(2, 2), padding='same'))(net) # {frames}x2x2x1024 net = concatenate([ net, Reshape( (-1, 2, 2, 1024))(RepeatVector(net.shape[1])(Flatten()(detail2x2x1024))) ]) net = TimeDistributed(Dropout(self.dropout))(net) # {frames}x2x2x2048 net = TimeDistributed( Conv2D(filters=1024, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x2x2x1024 net = concatenate([net, mask2x2x1024]) net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed( Conv2D(filters=1024, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x2x2x1024 net = TimeDistributed( Conv2DTranspose(512, (3, 3), strides=(2, 2), padding='same'))(net) # {frames}x4x4x512 net = concatenate([ net, Reshape( (-1, 4, 4, 512))(RepeatVector(net.shape[1])(Flatten()(detail4x4x512))) ]) # {frames}x4x4x1024 net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed( Conv2D(filters=512, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x4x4x512 net = concatenate([net, mask4x4x512]) # {frames}x4x4x1024 net = TimeDistributed(Dropout(self.dropout))(net) net = ConvLSTM2D(filters=512, kernel_size=3, return_sequences=True, use_bias=True, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x4x4x512 net = TimeDistributed( Conv2DTranspose(256, (3, 3), strides=(2, 2), padding='same'))(net) # {frames}x8x8x256 net = concatenate([ net, Reshape( (-1, 8, 8, 256))(RepeatVector(net.shape[1])(Flatten()(detail8x8x256))) ]) # {frames}x8x8x512 net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed( Conv2D(filters=256, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x8x8x256 net = concatenate([net, mask8x8x256]) net = TimeDistributed(Dropout(self.dropout))(net) net = ConvLSTM2D(filters=256, kernel_size=3, return_sequences=True, use_bias=True, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x8x8x256 net = TimeDistributed( Conv2DTranspose(128, (3, 3), strides=(2, 2), padding='same'))(net) # {frames}x16x16x128 net = concatenate([ net, Reshape( (-1, 16, 16, 128))(RepeatVector(net.shape[1])(Flatten()(detail16x16x128))) ]) # {frames}x16x16x256 net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed( Conv2D(filters=128, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x16x16x128 net = concatenate([net, mask16x16x128]) # {frames}x16x16x256 net = TimeDistributed(Dropout(self.dropout))(net) net = ConvLSTM2D(filters=128, kernel_size=3, return_sequences=True, use_bias=True, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x16x16x128 net = TimeDistributed( Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same'))(net) # {frames}x32x32x64 net = concatenate([ net, Reshape( (-1, 32, 32, 64))(RepeatVector(net.shape[1])(Flatten()(detail32x32x64))) ]) # {frames}x32x32x128 net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed( Conv2D(filters=64, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x32x32x64 net = concatenate([net, mask32x32x64]) net = TimeDistributed(Dropout(self.dropout))(net) # {frames}x32x32x128 net = TimeDistributed( Conv2D(filters=64, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x32x32x64 net = TimeDistributed( Conv2DTranspose(32, (3, 3), strides=(2, 2), padding='same'))(net) # {frames}x64x64x32 net = concatenate([ net, Reshape( (-1, 64, 64, 32))(RepeatVector(net.shape[1])(Flatten()(detail64x64x32))) ]) # {frames}x64x64x64 net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed( Conv2D(filters=32, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x64x64x32 net = concatenate([net, mask64x64x32]) net = TimeDistributed(Dropout(self.dropout))(net) # {frames}x64x64x64 net = TimeDistributed( Conv2D(filters=32, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x64x64x32 net = TimeDistributed( Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='same'))(net) # {frames}x128x128x16 net = concatenate([ net, Reshape( (-1, 128, 128, 16))(RepeatVector(net.shape[1])(Flatten()(detail128x128x16))) ]) # {frames}x128x128x32 net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed( Conv2D(filters=16, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x128x128x16 net = concatenate([net, mask128x128x16]) # {frames}x128x128x32 net = TimeDistributed( Conv2D(filters=16, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) # {frames}x128x128x16 net = ConvLSTM2D(filters=self.input_shape[-1], kernel_size=(1, 1), return_sequences=False, padding='same')(net) # 128x128x3 return [mean_input, stddev_input, detail_input, mask_input], net