def ModelShare(): tweet_a = Input(shape=(280, 256)) tweet_b = Input(shape=(280, 256)) # This layer can take as input a matrix # and will return a vector of size 64 shared_lstm = LSTM(64, return_sequences=True, name='lstm') # When we reuse the same layer instance # multiple times, the weights of the layer # are also being reused # (it is effectively *the same* layer) encoded_a = shared_lstm(tweet_a) encoded_b = shared_lstm(tweet_b) # We can then concatenate the two vectors: merged_vector = concatenate([encoded_a, encoded_b], axis=-1) # And add a logistic regression on top predictions = Dense(1, activation='sigmoid')(merged_vector) # We define a trainable model linking the # tweet inputs to the predictions model = Model(inputs=[tweet_a, tweet_b], outputs=predictions) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) return model
def modelDemoStandardConvLSTMInception(input_shape, parameter=None): # define LSTM input = Input(shape=input_shape, name='main_input') I_1 = TimeDistributed(Conv2D(16, (1, 1), activation='relu', padding='same', name='C_1'), name='I_11')(input) I_1 = TimeDistributed(Conv2D(16, (5, 5), activation='relu', padding='same', name='C_2'), name='I_12')(I_1) I_2 = TimeDistributed(MaxPooling2D((3, 3), strides=(1, 1), padding='same', name='C_3'), name='I_21')(input) I_2 = TimeDistributed(Conv2D(16, (1, 1), activation='relu', padding='same', name='C_4'), name='I_22')(I_2) concatenate_output = concatenate([I_1, I_2], axis=-1) # x = TimeDistributed(Flatten())(x) x = ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same', return_sequences=False)(concatenate_output) #x = MaxPooling2D((3, 3), strides=(1, 1), padding='same', name='M_1')(x) x = (Flatten())(x) x = RepeatVector(8)(x) x = LSTM(50, return_sequences=True)(x) output = TimeDistributed(Dense(8, activation='softmax'), name='main_output')(x) #with tensorflow.device('/cpu'): model = Model(inputs=[input], outputs=[output]) # compile the model with gpu #parallel_model = multi_gpu_model(model, gpus=2) #parallel_model.compile(loss={'main_output': 'categorical_crossentropy'}, # loss_weights={'main_output': 1.}, optimizer='adam', metrics=['accuracy']) #model = multi_gpu(model, gpus=[1, 2]) model.compile(loss={'main_output': 'categorical_crossentropy'}, loss_weights={'main_output': 1.}, optimizer='adam', metrics=['accuracy']) return model
def modelB(row, col, parameter=None): # define LSTM input = Input(shape=(None, row, col, 1), name='main_input') ''' x = TimeDistributed(Conv2D(16, (2, 2)))(input) x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(0.25)(x) ''' # tower_1 = TimeDistributed(Conv2D(16, (1, 1), padding='same', activation='relu'))(input) # tower_1 = TimeDistributed(Conv2D(16, (3, 3), padding='same', activation='relu'))(tower_1) tower_2 = TimeDistributed(Conv2D(16, (1, 1), padding='same'))(input) x = BatchNormalization()(tower_2) x = Activation('relu')(x) x = Dropout(0.25)(x) tower_2 = TimeDistributed(Conv2D(16, (5, 5), padding='same'))(x) x = BatchNormalization()(tower_2) x = Activation('relu')(x) tower_2 = Dropout(0.25)(x) tower_3 = TimeDistributed( MaxPooling2D((3, 3), strides=(1, 1), padding='same'))(input) tower_3 = TimeDistributed(Conv2D(16, (1, 1), padding='same'))(tower_3) x = BatchNormalization()(tower_3) x = Activation('relu')(x) tower_3 = Dropout(0.25)(x) concatenate_output = concatenate([tower_2, tower_3], axis=-1) x = TimeDistributed(MaxPooling2D(pool_size=(2, 2), strides=2))(concatenate_output) x = Dropout(0.25)(x) x = TimeDistributed(Flatten())(x) # convLstm = ConvLSTM2D(filters=40, kernel_size=(3, 3),padding='same', return_sequences=False)(x) lstm_output = LSTM(75)(x) lstm_output = BatchNormalization()(lstm_output) # lstm_output = BatchNormalization()(convLstm) # auxiliary_output = Dense(1, activation='sigmoid', name='aux_output')(lstm_output) # auxiliary_input = Input(shape=(4,), name='aux_input') # x = concatenate([lstm_output, auxiliary_input]) x = RepeatVector(4)(lstm_output) x = LSTM(50, return_sequences=True)(x) # model.add(Dropout(0.25)) x = BatchNormalization()(x) output = TimeDistributed(Dense(4, activation='softmax'), name='main_output')(x) model = Model(inputs=[input], outputs=[output]) model.compile(loss={'main_output': 'categorical_crossentropy'}, loss_weights={'main_output': 1.}, optimizer='adam', metrics=['accuracy']) return model
def ModelVisualQuestionAnswering(): # First, let's define a vision model using a Sequential model. # This model will encode an image into a vector. vision_model = Sequential() vision_model.add( Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(224, 224, 3))) vision_model.add(Conv2D(64, (3, 3), activation='relu')) vision_model.add(MaxPooling2D((2, 2))) vision_model.add(Conv2D(128, (3, 3), activation='relu', padding='same')) vision_model.add(Conv2D(128, (3, 3), activation='relu')) vision_model.add(MaxPooling2D((2, 2))) vision_model.add(Conv2D(256, (3, 3), activation='relu', padding='same')) vision_model.add(Conv2D(256, (3, 3), activation='relu')) vision_model.add(Conv2D(256, (3, 3), activation='relu')) vision_model.add(MaxPooling2D((2, 2))) vision_model.add(Flatten()) # Now let's get a tensor with the output of our vision model: image_input = Input(shape=(224, 224, 3)) encoded_image = vision_model(image_input) # Next, let's define a language model to encode the question into a vector. # Each question will be at most 100 words long, # and we will index words as integers from 1 to 9999. question_input = Input(shape=(100, ), dtype='int32') embedded_question = Embedding(input_dim=10000, output_dim=256, input_length=100)(question_input) encoded_question = LSTM(256)(embedded_question) # Let's concatenate the question vector and the image vector: merged = concatenate([encoded_question, encoded_image]) # And let's train a logistic regression over 1000 words on top: output = Dense(1000, activation='softmax')(merged) # This is our final model: vqa_model = Model(inputs=[image_input, question_input], outputs=output) return vqa_model
def ModelInception(): input_img = Input(shape=(256, 256, 3)) tower_1 = Conv2D(64, (1, 1), padding='same', activation='relu')(input_img) tower_1 = Conv2D(64, (3, 3), padding='same', activation='relu')(tower_1) tower_2 = Conv2D(64, (1, 1), padding='same', activation='relu')(input_img) tower_2 = Conv2D(64, (5, 5), padding='same', activation='relu')(tower_2) tower_3 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(input_img) tower_3 = Conv2D(64, (1, 1), padding='same', activation='relu')(tower_3) output = concatenate([tower_1, tower_2, tower_3], axis=1) model = Model(inputs=[input_img], outputs=output) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) return model
def ModelSharedVision(): # First, define the vision modules digit_input = Input(shape=(27, 27, 1)) x = Conv2D(64, (3, 3))(digit_input) x = Conv2D(64, (3, 3))(x) x = MaxPooling2D((2, 2))(x) out = Flatten()(x) vision_model = Model(digit_input, out) # Then define the tell-digits-apart model digit_a = Input(shape=(27, 27, 1)) digit_b = Input(shape=(27, 27, 1)) # The vision model will be shared, weights and all out_a = vision_model(digit_a) out_b = vision_model(digit_b) concatenated = concatenate([out_a, out_b]) out = Dense(1, activation='sigmoid')(concatenated) classification_model = Model([digit_a, digit_b], out) return classification_model
def modelA(row, col): # define LSTM input = Input(shape=(None, row, col, 1), name='main_input') x = TimeDistributed(Conv2D(16, (2, 2), activation='relu'))(input) x = Dropout(0.25)(x) x = BatchNormalization()(x) x = TimeDistributed(MaxPooling2D(pool_size=(2, 2), strides=2))(x) x = Dropout(0.25)(x) x = TimeDistributed(Flatten())(x) lstm_output = LSTM(75)(x) lstm_output = BatchNormalization()(lstm_output) auxiliary_output = Dense(1, activation='sigmoid', name='aux_output')(lstm_output) auxiliary_input = Input(shape=(4, ), name='aux_input') x = concatenate([lstm_output, auxiliary_input]) x = RepeatVector(8)(x) x = LSTM(50, return_sequences=True)(x) # model.add(Dropout(0.25)) x = BatchNormalization()(x) output = TimeDistributed(Dense(5, activation='softmax'), name='main_output')(x) model = Model(inputs=[input, auxiliary_input], outputs=[output, auxiliary_output]) model.compile(loss={ 'main_output': 'categorical_crossentropy', 'aux_output': 'binary_crossentropy' }, loss_weights={ 'main_output': 1., 'aux_output': 0.2 }, optimizer='adam', metrics=['accuracy']) return model
def ModelVideoQuestionAnswering(): # First, let's define a vision model using a Sequential model. # This model will encode an image into a vector. vision_model = Sequential() vision_model.add( Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(224, 224, 3))) vision_model.add(Conv2D(64, (3, 3), activation='relu')) vision_model.add(MaxPooling2D((2, 2))) vision_model.add(Conv2D(128, (3, 3), activation='relu', padding='same')) vision_model.add(Conv2D(128, (3, 3), activation='relu')) vision_model.add(MaxPooling2D((2, 2))) vision_model.add(Conv2D(256, (3, 3), activation='relu', padding='same')) vision_model.add(Conv2D(256, (3, 3), activation='relu')) vision_model.add(Conv2D(256, (3, 3), activation='relu')) vision_model.add(MaxPooling2D((2, 2))) vision_model.add(Flatten()) # Now let's get a tensor with the output of our vision model: image_input = Input(shape=(224, 224, 3)) encoded_image = vision_model(image_input) # Next, let's define a language model to encode the question into a vector. # Each question will be at most 100 words long, # and we will index words as integers from 1 to 9999. question_input = Input(shape=(100, ), dtype='int32') embedded_question = Embedding(input_dim=10000, output_dim=256, input_length=100)(question_input) encoded_question = LSTM(256)(embedded_question) # Let's concatenate the question vector and the image vector: merged = concatenate([encoded_question, encoded_image]) # And let's train a logistic regression over 1000 words on top: output = Dense(1000, activation='softmax')(merged) # This is our final model: # vqa_model = Model(inputs=[image_input, question_input], outputs=output) video_input = Input(shape=(100, 224, 224, 3)) # This is our video encoded via the previously trained vision_model (weights are reused) encoded_frame_sequence = TimeDistributed(vision_model)( video_input) # the output will be a sequence of vectors encoded_video = LSTM(256)( encoded_frame_sequence) # the output will be a vector # This is a model-level representation of the question encoder, reusing the same weights as before: question_encoder = Model(inputs=question_input, outputs=encoded_question) # Let's use it to encode the question: video_question_input = Input(shape=(100, ), dtype='int32') encoded_video_question = question_encoder(video_question_input) # And this is our video question answering model: merged = concatenate([encoded_video, encoded_video_question]) output = Dense(1000, activation='softmax')(merged) video_qa_model = Model(inputs=[video_input, video_question_input], outputs=output) return video_qa_model
def construct_keras_api_model(embedding_weights): # input_no_time_no_repeat = Input(shape=max_len, dtype='int32') # embedded_no_time_no_repeat = Embedding( # creative_id_window,embedding_size,weights=[embedding_weights],trainable=False # )(input_no_time_no_repeat) # ================================================================================== Input_fix_creative_id = Input(shape=(math.ceil(time_id_max / period_days) * period_length), dtype='int32', name='input_fix_creative_id') Embedded_fix_creative_id = Embedding( creative_id_window, embedding_size, weights=[embedding_weights], trainable=False)(Input_fix_creative_id) # ================================================================================== # input_no_time_with_repeat = Input(shape=max_len, dtype='int32') # embedded_no_time_with_repeat = Embedding(creative_id_window,embedding_size,weights=[embedding_weights],trainable=False)(input_no_time_with_repeat) # ---------------------------------------------------------------------- GM_x = keras.layers.GlobalMaxPooling1D()(Embedded_fix_creative_id) GM_x = Dropout(0.5)(GM_x) GM_x = Dense(embedding_size // 2, kernel_regularizer=l2(0.001))(GM_x) GM_x = BatchNormalization()(GM_x) GM_x = Activation('relu')(GM_x) GM_x = Dropout(0.5)(GM_x) GM_x = Dense(embedding_size // 4, kernel_regularizer=l2(0.001))(GM_x) GM_x = BatchNormalization()(GM_x) GM_x = Activation('relu')(GM_x) GM_x = Dense(1, 'sigmoid')(GM_x) # ---------------------------------------------------------------------- GA_x = GlobalAveragePooling1D()(Embedded_fix_creative_id) GA_x = Dropout(0.5)(GA_x) GA_x = Dense(embedding_size // 2, kernel_regularizer=l2(0.001))(GA_x) GA_x = BatchNormalization()(GA_x) GA_x = Activation('relu')(GA_x) GA_x = Dropout(0.5)(GA_x) GA_x = Dense(embedding_size // 4, kernel_regularizer=l2(0.001))(GA_x) GA_x = BatchNormalization()(GA_x) GA_x = Activation('relu')(GA_x) GA_x = Dense(1, 'sigmoid')(GA_x) # ================================================================================== Conv_creative_id = Conv1D(embedding_size, 15, 5, activation='relu')(Embedded_fix_creative_id) # ---------------------------------------------------------------------- Conv_GM_x = MaxPooling1D(7)(Conv_creative_id) Conv_GM_x = Conv1D(embedding_size, 2, 1, activation='relu')(Conv_GM_x) Conv_GM_x = GlobalMaxPooling1D()(Conv_GM_x) Conv_GM_x = Dropout(0.5)(Conv_GM_x) Conv_GM_x = Dense(embedding_size // 2, kernel_regularizer=l2(0.001))(Conv_GM_x) Conv_GM_x = BatchNormalization()(Conv_GM_x) Conv_GM_x = Activation('relu')(Conv_GM_x) Conv_GM_x = Dropout(0.5)(Conv_GM_x) Conv_GM_x = Dense(embedding_size // 4, kernel_regularizer=l2(0.001))(Conv_GM_x) Conv_GM_x = BatchNormalization()(Conv_GM_x) Conv_GM_x = Activation('relu')(Conv_GM_x) Conv_GM_x = Dense(1, 'sigmoid')(Conv_GM_x) # ---------------------------------------------------------------------- Conv_GA_x = AveragePooling1D(7)(Conv_creative_id) Conv_GA_x = Conv1D(embedding_size, 2, 1, activation='relu')(Conv_GA_x) Conv_GA_x = GlobalAveragePooling1D()(Conv_GA_x) Conv_GA_x = Dropout(0.5)(Conv_GA_x) Conv_GA_x = Dense(embedding_size // 2, kernel_regularizer=l2(0.001))(Conv_GA_x) Conv_GA_x = BatchNormalization()(Conv_GA_x) Conv_GA_x = Activation('relu')(Conv_GA_x) Conv_GA_x = Dropout(0.5)(Conv_GA_x) Conv_GA_x = Dense(embedding_size // 4, kernel_regularizer=l2(0.001))(Conv_GA_x) Conv_GA_x = BatchNormalization()(Conv_GA_x) Conv_GA_x = Activation('relu')(Conv_GA_x) Conv_GA_x = Dense(1, 'sigmoid')(Conv_GA_x) # ---------------------------------------------------------------------- LSTM_x = Conv1D(embedding_size, 14, 7, activation='relu')(Conv_creative_id) LSTM_x = LSTM(embedding_size, return_sequences=True)(LSTM_x) LSTM_x = LSTM(embedding_size, return_sequences=True)(LSTM_x) LSTM_x = LSTM(embedding_size)(LSTM_x) LSTM_x = Dropout(0.5)(LSTM_x) LSTM_x = Dense(embedding_size // 2, kernel_regularizer=l2(0.001))(LSTM_x) LSTM_x = BatchNormalization()(LSTM_x) LSTM_x = Activation('relu')(LSTM_x) LSTM_x = Dropout(0.5)(LSTM_x) LSTM_x = Dense(embedding_size // 4, kernel_regularizer=l2(0.001))(LSTM_x) LSTM_x = BatchNormalization()(LSTM_x) LSTM_x = Activation('relu')(LSTM_x) LSTM_x = Dense(1, 'sigmoid')(LSTM_x) # ---------------------------------------------------------------------- concatenated = concatenate([ GM_x, GA_x, Conv_GM_x, Conv_GA_x, LSTM_x, ], axis=-1) output_tensor = Dense(1, 'sigmoid')(concatenated) keras_api_model = Model( [ # input_no_time_no_repeat, Input_fix_creative_id, # input_no_time_with_repeat, ], output_tensor) keras_api_model.summary() plot_model(keras_api_model, to_file='model/keras_api_word2vec_model.png') print('-' * 5 + ' ' * 3 + "编译模型" + ' ' * 3 + '-' * 5) keras_api_model.compile(optimizer=optimizers.RMSprop(lr=RMSProp_lr), loss=losses.binary_crossentropy, metrics=[metrics.binary_accuracy]) return keras_api_model
def call(self,query_input,source_input,bias,training,cache=None,decode_loop_step=None): """Apply attention mechanism to query_input and source_input. Args: query_input: [B , len_query , hidden_size] source_input: [B , len_souce , hidden_size ] bias: [B, 1, len_query, len_source] training: bool cache: (Used during prediction) A dictionary with tensors containing results of previous attentions. The dictionary must have the items: {'k': tensor with shape [B,i,heads,dim_per_head], 'v': tensor with shape [B,i,heads,dim_per_head]} where i is the current decoded length for non-padded decode, or max sequence length for padded decode. decode_loop_step: An integer, step number of the decoding loop. Used only for autoregressive inference on TPU. Returns: Attention layer output with shape [B,len_query,hidden_size] """ # Linearly project query, key and value using different learned # projections. Splitting heads is automatically done during the linear # projections --> [B, len, num_heads, dim_per_head] query=self.query_dense_layer(query_input) key=self.key_dense_layer(source_input) value=self.value_dense_layer(source_input) if cache is not None: # Combine cached keys and values with new keys and values. if decode_loop_step is not None: cache_k_shape=cache['k'].shape.as_list() indices=tf.reshape( tf.one_hot(decode_loop_step,cache_k_shape[1],dtype=key.dtype), [1,cache_k_shape[1],1,1] ) key=cache['k']+key*indices cache_v_shape=cache['v'].shape.as_list() indices=tf.reshape( tf.one_hot(decode_loop_step,cache_v_shape[1],dtype=value.dtype), [1,cache_v_shape[1],1,1] ) value=cache['v']+value*indices else: key=layers.concatenate([tf.cast(cache['k'],key.dtype),key],axis=1) value=layers.concatenate([tf.cast(cache['v'],value.dtype),key],axis=1) # Update cache cache['k']=key cache['v']=value # Scale query to prevent the dot product between query and key from growing too large. depth=(self.hidden_size//self.num_heads) query*=depth**-0.5 # Calculate dot product attention logits=tf.einsum('BTNH,BFNH->BNFT',key,query) logits+=bias # Note that softmax internally performs math operations using float32 # for numeric stability. When training with float16, we keep the input # and output in float16 for better performance. weights=layers.Softmax('attention_weights')(logits) if training: weights=layers.Dropout(self.attention_dropout)(weights) attention_output=tf.einsum('BNFT,BTNH->BFNH',weights,value) # Run the outputs through another linear projection layer. Recombining heads # is automatically done --> [batch_size, length, hidden_size] attention_output=self.output_dense_layer(attention_output) return attention_output
def init_model(self, mode="resnet50"): """ 初始化模型 :param mode: 模型类型 :return: 模型名称和基础模型 """ if mode == "resnet50v2": from tensorflow.keras.applications.resnet_v2 import ResNet50V2 model_name = 'rotnet_v3_resnet50v2_{epoch:02d}_{val_acc:.4f}.hdf5' base_model = ResNet50V2(weights='imagenet', include_top=False, input_shape=self.input_shape) elif mode == "resnet50": from tensorflow.keras.applications.resnet import ResNet50 model_name = 'rotnet_v3_resnet50_{epoch:02d}_{val_acc:.4f}.hdf5' base_model = ResNet50(weights='imagenet', include_top=False, input_shape=self.input_shape) elif mode == "mobilenetv2": from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2 model_name = 'rotnet_v3_mobilenetv2_{epoch:02d}_{val_acc:.4f}.hdf5' base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=self.input_shape) elif mode == "densenet121": from tensorflow.python.keras.applications.densenet import DenseNet121 model_name = 'rotnet_v3_densenet121_{epoch:02d}_{val_acc:.4f}.hdf5' base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=self.input_shape) else: raise Exception("[Exception] mode {} 不支持!!".format(mode)) # freeze for layer in base_model.layers: layer.trainable = True x = base_model.output # if mode == "mobilenetv2": # x = Dense(128, activation="relu")(x) x = Flatten()(x) if self.is_hw_ratio: # 是否使用宽高比 x1 = base_model.output x1 = Flatten()(x1) input_ratio = Input(shape=(1, ), name='ratio') x2 = Dense(1, activation='relu')(input_ratio) x = concatenate([x1, x2]) final_output = Dense(self.nb_classes, activation='softmax', name='fc360')(x) model = Model(inputs=base_model.input, outputs=final_output) # model.summary() # 优化器 if self.nb_classes == 360: metrics = ["acc", angle_error] else: metrics = ["acc"] model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.0004, momentum=0.9), metrics=metrics) if self.model_path: model.load_weights(self.model_path) print('[Info] 加载模型的路径: {}'.format(self.model_path)) return model_name, model