def __init__(self, num_splits=2, axis=-1): Lambda.__init__(self, lambda x: tf.split(x, num_splits, axis=axis))
def __init__(self): super(CrossLayer4D, self).__init__() self.cross = Lambda(all_vs_all_pairs_4d, name="cross_layer")
def build_model(self): # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper. weight_decay = self.weight_decay basic_dropout_rate = 0.3 input = Input(shape=self.x_shape) curr = Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))(input) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = Dropout(basic_dropout_rate)(curr) curr = Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))(curr) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = MaxPooling2D(pool_size=(2, 2))(curr) curr = Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))(curr) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = Dropout(basic_dropout_rate + 0.1)(curr) curr = Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))(curr) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = MaxPooling2D(pool_size=(2, 2))(curr) curr = Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))(curr) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = Dropout(basic_dropout_rate + 0.1)(curr) curr = Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))(curr) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = Dropout(basic_dropout_rate + 0.1)(curr) curr = Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))(curr) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = MaxPooling2D(pool_size=(2, 2))(curr) curr = Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))(curr) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = Dropout(basic_dropout_rate + 0.1)(curr) curr = Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))(curr) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = Dropout(basic_dropout_rate + 0.1)(curr) curr = Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))(curr) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = MaxPooling2D(pool_size=(2, 2))(curr) curr = Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))(curr) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = Dropout(basic_dropout_rate + 0.1)(curr) curr = Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))(curr) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = Dropout(basic_dropout_rate + 0.1)(curr) curr = Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))(curr) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = MaxPooling2D(pool_size=(2, 2))(curr) curr = Dropout(basic_dropout_rate + 0.2)(curr) curr = Flatten()(curr) curr = Dense(512, kernel_regularizer=regularizers.l2(weight_decay))(curr) curr = Activation('relu')(curr) curr = BatchNormalization()(curr) curr = Dropout(basic_dropout_rate + 0.2)(curr) curr = Lambda(lambda x: K.dropout(x, level=self.mc_dropout_rate))(curr) # classification head (f) curr1 = Dense(self.num_classes, activation='softmax')(curr) # selection head (g) curr2 = Dense(512, kernel_regularizer=regularizers.l2(weight_decay))(curr) curr2 = Activation('relu')(curr2) curr2 = BatchNormalization()(curr2) # this normalization is identical to initialization of batchnorm gamma to 1/10 curr2 = Lambda(lambda x: x / 10)(curr2) curr2 = Dense(1, activation='sigmoid')(curr2) # auxiliary head (h) selective_output = Concatenate(axis=1, name="selective_head")([curr1, curr2]) auxiliary_output = Dense(self.num_classes, activation='softmax', name="classification_head")(curr) model = Model(inputs=input, outputs=[selective_output, auxiliary_output]) self.input = input # self.model_embeding = Model(inputs=input, outputs=curr) return model
def get_yolo3_train_model(model_type, anchors, num_classes, weights_path=None, freeze_level=1, optimizer=Adam(lr=1e-3, decay=0), label_smoothing=0, model_pruning=False, pruning_end_step=10000): '''create the training model, for YOLOv3''' #K.clear_session() # get a new session num_anchors = len(anchors) #YOLOv3 model has 9 anchors and 3 feature layers but #Tiny YOLOv3 model has 6 anchors and 2 feature layers, #so we can calculate feature layers number to get model type num_feature_layers = num_anchors // 3 #feature map target value, so its shape should be like: # [ # (image_height/32, image_width/32, 3, num_classes+5), # (image_height/16, image_width/16, 3, num_classes+5), # (image_height/8, image_width/8, 3, num_classes+5) # ] y_true = [ Input(shape=(None, None, 3, num_classes + 5), name='y_true_{}'.format(l)) for l in range(num_feature_layers) ] model_body, backbone_len = get_yolo3_model( model_type, num_feature_layers, num_anchors, num_classes, model_pruning=model_pruning, pruning_end_step=pruning_end_step) print('Create {} YOLOv3 {} model with {} anchors and {} classes.'.format( 'Tiny' if num_feature_layers == 2 else '', model_type, num_anchors, num_classes)) print('model layer number:', len(model_body.layers)) if weights_path: model_body.load_weights(weights_path, by_name=True) #, skip_mismatch=True) print('Load weights {}.'.format(weights_path)) if freeze_level in [1, 2]: # Freeze the backbone part or freeze all but final feature map & input layers. num = (backbone_len, len(model_body.layers) - 3)[freeze_level - 1] for i in range(num): model_body.layers[i].trainable = False print('Freeze the first {} layers of total {} layers.'.format( num, len(model_body.layers))) elif freeze_level == 0: # Unfreeze all layers. for i in range(len(model_body.layers)): model_body.layers[i].trainable = True print('Unfreeze all of the layers.') model_loss, location_loss, confidence_loss, class_loss = Lambda( yolo3_loss, name='yolo_loss', arguments={ 'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 'label_smoothing': label_smoothing })([*model_body.output, *y_true]) model = Model([model_body.input, *y_true], model_loss) model.compile( optimizer=optimizer, loss={ # use custom yolo_loss Lambda layer. 'yolo_loss': lambda y_true, y_pred: y_pred }) loss_dict = { 'location_loss': location_loss, 'confidence_loss': confidence_loss, 'class_loss': class_loss } add_metrics(model, loss_dict) return model
def fcn_8s(num_classes, input_shape, lr_init, lr_decay): img_input = Input(input_shape) # Block 1 x = Conv2D(64, (3, 3), padding='same', name='block1_conv1')(img_input) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(64, (3, 3), padding='same', name='block1_conv2')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D()(x) # Block 2 x = Conv2D(128, (3, 3), padding='same', name='block2_conv1')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(128, (3, 3), padding='same', name='block2_conv2')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D()(x) # Block 3 x = Conv2D(256, (3, 3), padding='same', name='block3_conv1')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(256, (3, 3), padding='same', name='block3_conv2')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(256, (3, 3), padding='same', name='block3_conv3')(x) x = BatchNormalization()(x) x = Activation('relu')(x) block_3_out = MaxPooling2D()(x) # Block 4 x = Conv2D(512, (3, 3), padding='same', name='block4_conv1')(block_3_out) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(512, (3, 3), padding='same', name='block4_conv2')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(512, (3, 3), padding='same', name='block4_conv3')(x) x = BatchNormalization()(x) x = Activation('relu')(x) block_4_out = MaxPooling2D()(x) # Block 5 x = Conv2D(512, (3, 3), padding='same', name='block5_conv1')(block_4_out) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(512, (3, 3), padding='same', name='block5_conv2')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(512, (3, 3), padding='same', name='block5_conv3')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D()(x) # Convolutinalized fully connected layer. x = Conv2D(1024, (7, 7), activation='relu', padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(1024, (1, 1), activation='relu', padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) # Classifying layers. x = Conv2D(num_classes, (1, 1), strides=(1, 1), activation='linear')(x) x = BatchNormalization()(x) block_3_out = Conv2D(num_classes, (1, 1), strides=(1, 1), activation='linear')(block_3_out) block_3_out = BatchNormalization()(block_3_out) block_4_out = Conv2D(num_classes, (1, 1), strides=(1, 1), activation='linear')(block_4_out) block_4_out = BatchNormalization()(block_4_out) x = Lambda(lambda x: tf.image.resize_images(x, (x.shape[1] * 2, x.shape[2] * 2)))(x) x = Add()([x, block_4_out]) x = Activation('relu')(x) x = Lambda(lambda x: tf.image.resize_images(x, (x.shape[1] * 2, x.shape[2] * 2)))(x) x = Add()([x, block_3_out]) x = Activation('relu')(x) x = Lambda(lambda x: tf.image.resize_images(x, (x.shape[1] * 8, x.shape[2] * 8)))(x) x = Activation('softmax')(x) model = Model(img_input, x) model.compile(optimizer=Adam(lr=lr_init, decay=lr_decay), loss='categorical_crossentropy', metrics=[dice_coef]) return model
def __init__(self, config, output_size, latent_vector_input, data_vector_input, name_scope): """ :param config: dictionary :param output_size: int :param latent_vector_input: generalized tensorflow input :param name_scope: string """ logger = logging.getLogger(self.__class__.__name__ + ":" + str(name_scope)) # get config # --------- depth = get_or_default(config, 'depth', 4, logger) width_network_config = get_or_default(config, 'width_network', {}, logger) translation_network_config = get_or_default(config, 'translation_network', {}, logger) width_network_args = { 'output_size': output_size, 'name_scope': name_scope, 'name': 'width_network', 'config': width_network_config } translation_networks_args = { 'output_size': output_size, 'name_scope': name_scope, 'name': 'translation_network', 'config': translation_network_config } # --------- # generate masks, width and translation networks # --------- masks = [] w_model = [] t_model = [] for d in range(depth): # alternating random masks for the coupling layers if d % 2 == 0: mask = EvenMask(output_size) else: mask = OddMask(output_size) with tf.name_scope(name_scope): m = tf.Variable(mask(), dtype=tf.float32, trainable=False, name='mask') masks.append(m) # create w and t w_model.append( create_model(MultiLayerPerceptron, input_size=output_size, model_args=width_network_args)) t_model.append( create_model(MultiLayerPerceptron, input_size=output_size, model_args=translation_networks_args)) # --------- # build the full sample model by stacking coupling layers # --------- # feed-forward input_cl_ff = list() input_cl_ff.append( CouplingLayer(mode='feed_forward', input_tensor=latent_vector_input, mask=masks[0], width_model=w_model[0], translation_model=t_model[0])) for d in range(1, depth): input_cl_ff.append( CouplingLayer(mode='feed_forward', input_tensor=input_cl_ff[d - 1].output, mask=masks[d], width_model=w_model[d], translation_model=t_model[d])) self._data_vector = input_cl_ff[d].output # feed backward input_cl_fb = list() input_cl_fb.append( CouplingLayer(mode='feed_backward', input_tensor=data_vector_input, mask=masks[depth - 1], width_model=w_model[depth - 1], translation_model=t_model[depth - 1])) for d in range(1, depth): input_cl_fb.append( CouplingLayer(mode='feed_backward', input_tensor=input_cl_fb[d - 1].output, mask=masks[depth - 1 - d], width_model=w_model[depth - 1 - d], translation_model=t_model[depth - 1 - d])) self._latent_vector = input_cl_fb[d].output # --------- # build the likelihood model # --------- concat_log_det_jac = Concatenate(axis=1)( [input_cl_fb[c].log_det_jac for c in range(depth)]) self._log_det_jac = Lambda(lambda x: tf.reduce_sum(x, axis=1))( concat_log_det_jac) self._det_jac_test = tf.matrix_determinant( tf.stack([ tf.gradients(self._latent_vector[:, idx], data_vector_input)[0] for idx in range(output_size) ], axis=1)) self._llh = Lambda(lambda x: -0.5*output_size*np.log(2*np.pi) - 0.5 * tf.reduce_sum(tf.square(x[0]), axis=1)+ \ x[1])([self._latent_vector, self._log_det_jac])
padding='same', name=name_fn('conv', 2))(x) if batch_norm: x = BatchNormalization(name=name_fn('bn', 2))(x) x = LeakyReLU(alpha=0.3, name=name_fn('act', 2))(x) return x # As the dimensions of our images may not be normalized/even, it is possible that after # downsampling and upsampling, we do not reobtain the original size (with a difference # of +/- 1px). # To avoid the problems this may cause, we define a layer to slightly resize the generated # image to the dimensions of the target one: ResizeToSame = lambda name: Lambda( lambda images: tf.image.resize(images[0], tf.shape(images[1])[1:3]), # `images` is a tuple of 2 tensors. # We resize the first image tensor to the shape of the 2nd name=name) def unet(x, out_channels=3, layer_depth=4, filters_orig=32, kernel_size=4,activation_intermediate=None, batch_norm=True, final_activation='sigmoid'): """ Pass the tensor through a trainable UNet. :param x: Input tensor. :param out_channels: Number of output channels. :param layer_depth: Number of convolutional blocks vertically stacked. :param filters_orig: Number of filters for the 1st block (then multiplied by 2 every block). :param kernel_size: Kernel size for layers. :param batch_norm: Flag to apply batch normalization. :param final_activation: Name/function for the last activation.
def build_model(self): ################### ### Encoder ################### # definition encoder_inputs = Input(shape=(self.args['MAX_LEN_INPUT'],)) encoder_embed = Embedding(self.args['LEN_WORD2IDX_INPUTS'] + 1, self.args['EMBEDDING_DIM'], #weights=[embedding_matrix], input_length=self.args['MAX_LEN_INPUT'], #trainable=True ) encoder_bilstm = Bidirectional( LSTM(self.args['LATENT_DIM'], return_sequences=True, # dropout=0.5 # dropout not available on gpu )) # pipeline encoder_x = encoder_embed(encoder_inputs) encoder_outputs = encoder_bilstm(encoder_x) ################### ### Decoder ################### # definition decoder_inputs = Input(shape=(self.args['MAX_LEN_TARGET'],)) # teacher forcing input decoder_embed = Embedding(self.args['LEN_WORD2IDX_OUTPUTS'] + 1, self.args['EMBEDDING_DIM'] ) # pipeline decoder_x = decoder_embed(decoder_inputs) def _softmax_over_time(x): # make sure we do softmax over the time axis # expected shape is N x T x D assert(K.ndim(x) > 2) e = K.exp(x - K.max(x, axis=1, keepdims=True)) # axis=1에 주목. s = K.sum(e, axis=1, keepdims=True) return e / s # ATTENTION # Attention layers need to be global (전역 변수) because they will be repeated Ty times at the decoder attn_repeat_layer = RepeatVector(self.args['MAX_LEN_INPUT']) attn_concat_layer = Concatenate(axis=-1) attn_dense1 = Dense(10, activation='tanh') attn_dense2 = Dense(1, activation=_softmax_over_time) attn_dot = Dot(axes=1) # to perform the weighted sum of alpha[t] * h[t] # define the rest of the decoder (after attention) decoder_lstm = LSTM(self.args['LATENT_DIM_DECODER'], return_state=True) decoder_dense = Dense(self.args['LEN_WORD2IDX_OUTPUTS'] + 1, activation='softmax') initial_s = Input(shape=(self.args['LATENT_DIM_DECODER'],), name='s0') initial_c = Input(shape=(self.args['LATENT_DIM_DECODER'],), name='c0') context_last_word_concat_layer = Concatenate(axis=2) # for teacher forcing # Unlike previous seq2seq, we cannot get the output all in one step # Instead we need to do Ty steps And in each of those steps, we need to consider all Tx h's # s, c will be re-assigned in each iteration of the loop s = initial_s c = initial_c def _one_step_attention(h, st_1): # h = h(1), ..., h(Tx), shape = (Tx, LATENT_DIM * 2) # st_1 = s(t-1), shape = (LATENT_DIM_DECODER,) # copy s(t-1) Tx times # now shape = (Tx, LATENT_DIM_DECODER) st_1 = attn_repeat_layer(st_1) # Concatenate all h(t)'s with s(t-1) # Now of shape (Tx, LATENT_DIM_DECODER + LATENT_DIM * 2) x = attn_concat_layer([h, st_1]) # Neural net first layer x = attn_dense1(x) # Neural net second layer with special softmax over time alphas = attn_dense2(x) # "Dot" the alphas and the h's # Remember a.dot(b) = sum over a[t] * b[t] context = attn_dot([alphas, h]) return context # collect outputs in a list at first outputs = [] # 원래 LSTM은 내부적으로 아래와 같은 for문을 진행하지만, 여기서 우리는 Context를 계산하기 위해서 manual하게 for문을 구성함. for t in range(self.args['MAX_LEN_TARGET']): # Ty times ###################################################### ## `one_step_attention` function ! # get the context using attention context = _one_step_attention(encoder_outputs, s) # we need a different layer for each time step selector = Lambda(lambda x: x[:, t:t+1]) # 해당 time 벡터만 추출. 우리는 layer-wise로 코딩해야 되기 때문에 lambda를 사용. xt = selector(decoder_x) # combine decoder_lstm_input = context_last_word_concat_layer([context, xt]) # pass the combined [context, last word] into the LSTM # along with [s, c] # get the new [s, c] and output o, s, c = decoder_lstm(decoder_lstm_input, initial_state=[s, c]) # final dense layer to get next word prediction decoder_outputs = decoder_dense(o) outputs.append(decoder_outputs) def _stack_and_transpose(x): # 다시 원래의 shape로 만들기 위해. # 'outputs' is now a list of length Ty # each element is of shape (batch size, output vocab size) # therefore if we simply stack all the outputs into 1 tensor # it would be of shape T x N x D # we would like it to be of shape N x T x D # x is a list of length T, each element is a batch_size x output_vocab_size tensor x = K.stack(x) # is now T x batch_size x output_vocab_size tensor x = K.permute_dimensions(x, pattern=(1, 0, 2)) # is now batch_size x T x output_vocab_size return x # pipeline stacker = Lambda(_stack_and_transpose) decoder_outputs = stacker(outputs) ######### ### Encoder&Decoder Model self.e2d_model = Model( inputs=[ encoder_inputs, decoder_inputs, initial_s, initial_c, ], outputs=decoder_outputs) # compile the model self.e2d_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) ########################### For Prediction ########################### ################### ### t1 Encoder ################### self.encoder_model = Model(encoder_inputs, encoder_outputs) ################### ### t1 Decoder ################### # next we define a T=1 decoder model encoder_outputs_as_input = Input(shape=(self.args['MAX_LEN_INPUT'], self.args['LATENT_DIM'] * 2,)) decoder_inputs_single = Input(shape=(1,)) decoder_inputs_single_x = decoder_embed(decoder_inputs_single) # no need to loop over attention steps this time because there is only one step context = _one_step_attention(encoder_outputs_as_input, initial_s) # combine context with last word decoder_lstm_input = context_last_word_concat_layer([context, decoder_inputs_single_x]) # lstm and final dense o, s, c = decoder_lstm(decoder_lstm_input, initial_state=[initial_s, initial_c]) decoder_outputs = decoder_dense(o) # note: we don't really need the final stack and tranpose # because there's only 1 output # it is already of size N x D # no need to make it 1 x N x D --> N x 1 x D # time dimension이 1이기 때문에 자동으로 없어짐: 따라서, stack_and_transpose함수가 필요없음. # create the model object self.decoder_model = Model( inputs=[ decoder_inputs_single, encoder_outputs_as_input, initial_s, initial_c ], outputs=[decoder_outputs, s, c] )
def model_unet_kaggle(img_hieght, img_width, img_channel, num_classes): """ This function returns a U-Net Model for this binary fire segmentation images: :param img_hieght: Image Height :param img_width: Image Width :param img_channel: Number of channels in each image :param num_classes: Number of classes based on the Ground Truth Masks :return: A convolutional NN based on Tensorflow and Keras """ inputs = Input((img_hieght, img_width, img_channel)) s = Lambda(lambda x: x / 255)(inputs) c1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(s) c1 = Dropout(0.1)(c1) c1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(c1) p1 = MaxPooling2D((2, 2))(c1) c2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(p1) c2 = Dropout(0.1)(c2) c2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(c2) p2 = MaxPooling2D((2, 2))(c2) c3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(p2) c3 = Dropout(0.2)(c3) c3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(c3) p3 = MaxPooling2D((2, 2))(c3) c4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(p3) c4 = Dropout(0.2)(c4) c4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(c4) p4 = MaxPooling2D(pool_size=(2, 2))(c4) c5 = Conv2D(256, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(p4) c5 = Dropout(0.3)(c5) c5 = Conv2D(256, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(c5) u6 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5) u6 = concatenate([u6, c4]) c6 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(u6) c6 = Dropout(0.2)(c6) c6 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(c6) u7 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6) u7 = concatenate([u7, c3]) c7 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(u7) c7 = Dropout(0.2)(c7) c7 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(c7) u8 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7) u8 = concatenate([u8, c2]) c8 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(u8) c8 = Dropout(0.1)(c8) c8 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(c8) u9 = Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8) u9 = concatenate([u9, c1], axis=3) c9 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(u9) c9 = Dropout(0.1)(c9) c9 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(c9) outputs = Conv2D(1, (1, 1), activation='sigmoid')(c9) model = Model(inputs=[inputs], outputs=[outputs]) return model
def dual_path_block(x, block_type, cardinality, filter_increment, pointwise_filters_a, grouped_conv_filters_b, pointwise_filters_c): if tf.keras.backend.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = -1 grouped_channels = int(grouped_conv_filters_b / cardinality) init = concatenate(x, axis=channel_axis) if isinstance(x, list) else x if block_type == 'projection': stride = (1, 1) projection = True elif block_type == 'downsample': stride = (2, 2) projection = True elif block_type == 'normal': stride = (1, 1) projection = False else: raise ValueError( '`block_type` must be one of ["projection", "downsample", "normal"]. Given %s' % block_type) if projection: projection_path = relu_block(init, filters=pointwise_filters_c + 2 * filter_increment, kernel=(1, 1), stride=stride) input_residual_path = Lambda( lambda z: z[:, :, :, :pointwise_filters_c] if K.image_data_format( ) == 'channels_last' else z[:, :pointwise_filters_c, :, :])( projection_path) input_dense_path = Lambda( lambda z: z[:, :, :, pointwise_filters_c:] if K.image_data_format( ) == 'channels_last' else z[:, pointwise_filters_c:, :, :])( projection_path) else: input_residual_path = x[0] input_dense_path = x[1] x = relu_block(init, filters=pointwise_filters_a, kernel=(1, 1)) x = grouped_block(x, grouped_channels=grouped_channels, cardinality=cardinality, strides=stride) x = relu_block(x, filters=pointwise_filters_c + filter_increment, kernel=(1, 1)) output_residual_path = Lambda(lambda z: z[:, :, :, :pointwise_filters_c] if K.image_data_format() == 'channels_last' else z[:, :pointwise_filters_c, :, :])(x) output_dense_path = Lambda(lambda z: z[:, :, :, pointwise_filters_c:] if K.image_data_format() == 'channels_last' else z[:, pointwise_filters_c:, :, :])(x) residual_path = add([input_residual_path, output_residual_path]) dense_path = concatenate([input_dense_path, output_dense_path], axis=channel_axis) return [residual_path, dense_path]
def pointnet_cls(include_top=True, weights=None, input_tensor=None, input_shape=(2048, 3), pooling=None, classes=40, activation=None, use_tnet=True): """ PointNet model for object classification :param include_top: whether to include the stack of fully connected layers :param weights: one of `None` (random initialization), 'modelnet' (pre-training on ModelNet), or the path to the weights file to be loaded. :param input_tensor: optional tensor of size BxNxK :param input_shape: shape of the input point clouds (NxK) :param pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 2D tensor output of the last convolutional block (Nx1024). - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 1D tensor of size 1024. - `max` means that global max pooling will be applied. :param classes: number of classes in the classification problem; if dict, construct multiple disjoint top layers :param activation: activation of the last layer (default None). :param use_tnet: whether to use the transformation subnets or not. :return: Keras model of the classification network """ assert K.image_data_format() == 'channels_last' # Generate input tensor and get base network if input_tensor is None: input_tensor = Input(input_shape, name='Input_cloud') num_point = input_tensor.shape[-2] net = pointnet_base(input_tensor, use_tnet) # Top layers if include_top: # Symmetric function: max pooling # Done in 2D since 1D is painfully slow net = MaxPooling2D(pool_size=(num_point, 1), padding='valid', name='maxpool')(Lambda(K.expand_dims)(net)) net = Reshape((1024, ))(net) if isinstance(classes, dict): # Disjoint stacks of fc layers, one per value in dict net = [ dense_bn(net, units=512, scope=r + '_fc1', activation='relu') for r in classes ] net = [ Dropout(0.3, name=r + '_dp1')(n) for r, n in zip(classes, net) ] net = [ dense_bn(n, units=256, scope=r + '_fc2', activation='relu') for r, n in zip(classes, net) ] net = [ Dropout(0.3, name=r + '_dp2')(n) for r, n in zip(classes, net) ] net = [ Dense(units=classes[r], activation=activation, name=r)(n) for r, n in zip(classes, net) ] else: # Fully connected layers for a single classification task net = dense_bn(net, units=512, scope='fc1', activation='relu') net = Dropout(0.3, name='dp1')(net) net = dense_bn(net, units=256, scope='fc2', activation='relu') net = Dropout(0.3, name='dp2')(net) net = Dense(units=classes, name='fc3', activation=activation)(net) else: if pooling == 'avg': net = MaxPooling2D(pool_size=(num_point, 1), padding='valid', name='maxpool')(Lambda(K.expand_dims)(net)) net = Reshape((net.shape[-2], ))(net) elif pooling == 'max': net = AveragePooling2D(pool_size=(num_point, 1), padding='valid', name='avgpool')(Lambda(K.expand_dims)(net)) net = Reshape((net.shape[-2], ))(net) model = Model(input_tensor, net, name='pointnet_cls') # Load weights. if weights == 'modelnet': weights_path = keras_utils.get_file( 'pointnet_modelnet_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models') model.load_weights(weights_path, by_name=True) if K.backend() == 'theano': keras_utils.convert_all_kernels_in_model(model) elif weights is not None: model.load_weights(weights, by_name=True) return model
def ElmoEmbedding(x): return elmo_model(inputs={"tokens": tf.squeeze(tf.cast(x, tf.string)),"sequence_len": tf.constant(batch_size*[max_len]) }, signature="tokens", as_dict=True)["elmo"] from tensorflow.keras.layers import Dense, Flatten, Embedding, LSTM, TimeDistributed, Dropout, Bidirectional, Lambda from tensorflow.keras.models import Model from tensorflow.keras.layers import concatenate from tensorflow.keras import Input input = Input(shape=(max_len,), dtype=tf.string) embeddings = Lambda(ElmoEmbedding, output_shape=(max_len, 1024))(input) x = Bidirectional(LSTM(units=512, return_sequences=True, recurrent_dropout=0.2, dropout=0.2))(embeddings) x_rnn = Bidirectional(LSTM(units=512, return_sequences=True, recurrent_dropout=0.2, dropout=0.2))(x) x = concatenate([x, x_rnn]) out = TimeDistributed(Dense(n_tags, activation='softmax'))(x) model = Model(input, out) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) X_tr, X_val = X_train[:1213*batch_size], X_train[-135*batch_size:] y_tr, y_val = y_train[:1213*batch_size], y_train[-135*batch_size:] y_tr = y_tr.reshape(y_tr.shape[0], y_tr.shape[1], 1) y_val = y_val.reshape(y_val.shape[0], y_val.shape[1], 1) m = np.array(X_tr) print(m.shape)
0: 32, 1: 16 }[l], w // { 0: 32, 1: 16 }[l], num_anchors // 2, num_classes + 5)) for l in range(2) ] # 输入为*model_body.input, *y_true # 输出为model_loss loss_input = [*model_body.output, *y_true] model_loss = Lambda(yolo_loss, output_shape=(1, ), name='yolo_loss', arguments={ 'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 'label_smoothing': label_smoothing })(loss_input) model = Model([model_body.input, *y_true], model_loss) # 训练参数设置 logging = TensorBoard(log_dir=log_dir) checkpoint = ModelCheckpoint( log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=False, period=1)
def build_transformer(source_vocabulary_size, target_vocabulary_size, max_length, share_word_embedding=False, n=6, h=8, d_k=64, d_v=64, d_model=512, optimizer="adam", null_token_value=0): source_input = Input(shape=(None, ), name="source_input") target_input = Input(shape=(None, ), name="target_input") enc_input = Lambda(lambda x: x[:, 1:])(source_input) dec_input = Lambda(lambda x: x[:, :-1])(target_input) dec_target_output = Lambda(lambda x: x[:, 1:])(target_input) # create embedding source_word_embedding = Embedding( source_vocabulary_size, d_model, name="source_embedding" if share_word_embedding else "source_embedding" ) # weights=[_get_positional_encoding_matrix(max_length, d_model)] if share_word_embedding: target_word_embedding = source_word_embedding else: target_word_embedding = Embedding(target_vocabulary_size, d_model, name="target_embedding") # embedding for the position encoding position_encoding = Embedding( max_length, d_model, trainable=False, weights=[_get_positional_encoding_matrix(max_length, d_model)], name="position_embedding") enc = Encoder(source_word_embedding, position_encoding, n=n, h=h, d_k=d_k, d_v=d_v, d_model=d_model, d_inner_hid=512) dec = Decoder(target_word_embedding, position_encoding, n=n, h=h, d_k=d_k, d_v=d_v, d_model=d_model, d_inner_hid=512) enc_output = enc(enc_input) dec_output = dec(dec_input, enc_output) # lin_dense = TimeDistributed(Dense(d_model)) fin_output = TimeDistributed(Dense(target_vocabulary_size, activation=None, use_bias=False), name="output") # "softmax" # lin_dense_out = lin_dense(dec_output) fin_output_out = fin_output(dec_output) # lin_dense_out) accuracy = Lambda(_get_accuracy, arguments={"null_token_value": null_token_value })([fin_output_out, dec_target_output]) loss = Lambda(_get_loss, arguments={"null_token_value": null_token_value })([fin_output_out, dec_target_output]) train_model = Model(inputs=[source_input, target_input], outputs=loss) train_model.add_loss([loss]) train_model.compile(optimizer, None) train_model.metrics_names.append('accuracy') # when using tf.keras #train_model.metrics_tensors.append(accuracy) train_model.metrics.append(accuracy) inference_model = Model([source_input, target_input], fin_output_out) return train_model, inference_model
def __init__(self): Lambda.__init__(self, lambda x: tf.log(x + 1e-10))
def __initialize_poison_tensorflow(self, x_trigger: np.ndarray, y_trigger: np.ndarray, x_poison: np.ndarray, y_poison: np.ndarray): """ Initialize poison noises to be optimized. :param x_trigger: A list of samples to use as triggers. :param y_trigger: A list of target classes to classify the triggers into. :param x_train: A list of training data to poison a portion of. :param y_train: A list of labels for x_train. """ # pylint: disable=no-name-in-module from tensorflow.keras import backend as K import tensorflow as tf from tensorflow.keras.layers import Input, Embedding, Add, Lambda from art.estimators.classification.tensorflow import TensorFlowV2Classifier if isinstance(self.substitute_classifier, TensorFlowV2Classifier): classifier = self.substitute_classifier else: raise Exception( "This method requires `TensorFlowV2Classifier` as `substitute_classifier`'s type" ) self.model_trainable = classifier.model.trainable classifier.model.trainable = False # This value gets revert back later. def _weight_grad(classifier: TensorFlowV2Classifier, x: tf.Tensor, target: tf.Tensor) -> tf.Tensor: # Get the target gradient vector. import tensorflow as tf with tf.GradientTape() as t: # pylint: disable=C0103 t.watch(classifier.model.weights) output = classifier.model(x, training=False) loss = classifier.model.compiled_loss(target, output) d_w = t.gradient(loss, classifier.model.weights) d_w = [w for w in d_w if w is not None] d_w = tf.concat([tf.reshape(d, [-1]) for d in d_w], 0) d_w_norm = d_w / tf.sqrt(tf.reduce_sum(tf.square(d_w))) return d_w_norm self.grad_ws_norm = _weight_grad(classifier, tf.constant(x_trigger), tf.constant(y_trigger)) # Define the model to apply and optimize the poison. input_poison = Input(batch_shape=classifier.model.input.shape) input_indices = Input(shape=()) y_true_poison = Input(shape=np.shape(y_poison)[1:]) embedding_layer = Embedding( len(x_poison), np.prod(input_poison.shape[1:]), embeddings_initializer=tf.keras.initializers.RandomNormal( stddev=self.epsilon * 0.01), ) embeddings = embedding_layer(input_indices) embeddings = tf.tanh(embeddings) * self.epsilon embeddings = tf.reshape(embeddings, tf.shape(input_poison)) input_noised = Add()([input_poison, embeddings]) input_noised = Lambda( lambda x: K.clip(x, self.clip_values[0], self.clip_values[1]) )(input_noised) # Make sure the poisoned samples are in a valid range. def loss_fn(input_noised: tf.Tensor, target: tf.Tensor, grad_ws_norm: tf.Tensor): d_w2_norm = _weight_grad(classifier, input_noised, target) B = 1 - tf.reduce_sum(grad_ws_norm * d_w2_norm) # pylint: disable=C0103 return B B = tf.keras.layers.Lambda(lambda x: loss_fn(x[0], x[1], x[2]))( # pylint: disable=C0103 [input_noised, y_true_poison, self.grad_ws_norm]) self.backdoor_model = tf.keras.models.Model( [input_poison, y_true_poison, input_indices], [input_noised, B]) self.backdoor_model.add_loss(B) class PredefinedLRSchedule( tf.keras.optimizers.schedules.LearningRateSchedule): """ Use a preset learning rate based on the current training epoch. """ def __init__(self, learning_rates: List[float], milestones: List[int]): self.schedule = list(zip(milestones, learning_rates)) def __call__(self, step: int) -> float: lr_prev = self.schedule[0][1] for m, learning_rate in self.schedule: if step < m: return lr_prev lr_prev = learning_rate return lr_prev def get_config(self) -> Dict: """ Returns the parameters. """ return {"schedule": self.schedule} self.optimizer = tf.keras.optimizers.Adam(gradient_transformers=[ lambda grads_and_vars: [(tf.sign(g), v) for (g, v) in grads_and_vars] ]) self.lr_schedule = tf.keras.callbacks.LearningRateScheduler( PredefinedLRSchedule(*self.learning_rate_schedule))
def __init__(self, size): Lambda.__init__(self, lambda x: tf.tile(tf.expand_dims(tf.expand_dims(x, 2), 3), [1, 1, size, size]))
def sub_pixel_conv2d(scale=2, **kwargs): return Lambda(lambda x: tf.compat.v1.depth_to_space(x, scale), **kwargs)
def line_lstm_ctc(input_shape, output_shape, window_width=28, window_stride=14): image_height, image_width = input_shape output_length, num_classes = output_shape num_windows = int((image_width - window_width) / window_stride) + 1 if num_windows < output_length: raise ValueError( f'Window width/stride need to generate at least {output_length} windows (currently {num_windows})' ) image_input = Input(shape=input_shape, name='image') y_true = Input(shape=(output_length, ), name='y_true') input_length = Input(shape=(1, ), name='input_length') label_length = Input(shape=(1, ), name='label_length') gpu_present = len(device_lib.list_local_devices()) > 1 lstm_fn = CuDNNLSTM if gpu_present else LSTM # Your code should use slide_window and extract image patches from image_input. # Pass a convolutional model over each image patch to generate a feature vector per window. # Pass these features through one or more LSTM layers. # Convert the lstm outputs to softmax outputs. # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length). ##### Your code below (Lab 3) image_reshaped = Reshape((image_height, image_width, 1))(image_input) # (image_height, image_width, 1) image_patches = Lambda(slide_window, arguments={ 'window_width': window_width, 'window_stride': window_stride })(image_reshaped) convnet = lenet((image_height, window_width, 1), (num_classes, )) convnet = KerasModel(inputs=convnet.inputs, outputs=convnet.layers[-2].output) convnet_outputs = TimeDistributed(convnet)(image_patches) # (num_windows, 200) lstm_output = lstm_fn(200, return_sequences=True)(convnet_outputs) softmax_output = Dense(num_classes, activation='softmax', name='softmax_output')(lstm_output) ##### Your code above (Lab 3) input_length_processed = Lambda( lambda x, num_windows=None: x * num_windows, arguments={'num_windows': num_windows})(input_length) ctc_loss_output = Lambda( lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name='ctc_loss')( [y_true, softmax_output, input_length_processed, label_length]) ctc_decoded_output = Lambda( lambda x: ctc_decode(x[0], x[1], output_length), name='ctc_decoded')([softmax_output, input_length_processed]) model = KerasModel( inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output]) return model
x = Bidirectional(GRU(RNN_cell, return_sequences=True))(x) x = Bidirectional(GRU(RNN_cell, return_sequences=True))(x) x = Dense(num_classes, activation='softmax')(x) # 定义模型 model = Model(image_input, x) # In[6]: # 定义标签输入 labels = Input(shape=(max_len), name='max_len') # 输入长度 input_len = Input(shape=(1), name='input_len') # 标签长度 label_len = Input(shape=(1), name='label_len') # Lambda的作用是可以将自定义的函数封装到网络中,用于自定义的一些数据计算处理 ctc_out = Lambda(ctc_lambda_func, name='ctc')([x, labels, input_len, label_len]) # 定义模型 ctc_model = Model(inputs=[image_input, labels, input_len, label_len], outputs=ctc_out) # In[7]: # 注意这里是load_weights,载入权值,这里不能直接用load_model载入模型 # 因为keras中没有封装ctc的loss,ctc的loss是在tensorflow中定义的,属于keras外部自定义loss # 模型save的时候如果包含了自定义loss,那么在load_model的时候也需要声明自定义loss。 # 在这个应用中还是重新搭建一遍模型并使用load_weights载入模型权值比较简单 model.load_weights('Best_Captcha_ctc.h5') # In[34]: # 用于预测的字符集多一个空白符
def UpSampling2DBilinear(size): return Lambda(lambda x: tf.image.resize_bilinear(x, size, align_corners=True))
def CreateModel(self): ''' 定义CNN/LSTM/CTC模型,使用函数式模型 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) 隐藏层一:3*3卷积层 隐藏层二:池化层,池化窗口大小为2 隐藏层三:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 隐藏层四:循环层、LSTM/GRU层 隐藏层五:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 隐藏层六:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, 输出层:自定义层,即CTC层,使用CTC的loss作为损失函数,实现连接性时序多输出 ''' # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500 input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) layer_h1 = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 layer_h1 = Dropout(0.1)(layer_h1) layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 layer_h3 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h2) # 池化层 #layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 layer_h3 = Dropout(0.2)(layer_h3) layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 layer_h4 = Dropout(0.2)(layer_h4) layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 layer_h6 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h5) # 池化层 layer_h6 = Dropout(0.3)(layer_h6) layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 layer_h7 = Dropout(0.3)(layer_h7) layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 layer_h9 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h8) # 池化层 layer_h9 = Dropout(0.3)(layer_h9) layer_h10 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层 layer_h10 = Dropout(0.4)(layer_h10) layer_h11 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层 layer_h12 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层 #test=Model(inputs = input_data, outputs = layer_h6) #test.summary() layer_h13 = Reshape((200, 3200))(layer_h12) #Reshape层 layer_h13 = Dropout(0.4)(layer_h13) layer_h14 = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h13) # 全连接层 layer_h14 = Dropout(0.4)(layer_h14) inner = layer_h14 #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 rnn_size = 128 gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) gru2 = concatenate([gru_2, gru_2b]) #layer_h12 = GRU(128,activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True)(layer_h11) layer_h15 = Dropout(0.4)(gru2) layer_h16 = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h15) # 全连接层 layer_h16 = Dropout(0.5)(layer_h16) # 随机中断部分神经网络连接,防止过拟合 layer_h17 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h16) # 全连接层 y_pred = Activation('softmax', name='Activation0')(layer_h17) model_data = Model(inputs=input_data, outputs=y_pred) #model_data.summary() labels = Input(name='the_labels', shape=[self.label_max_string_length], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ), name='ctc')( [y_pred, labels, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model.summary() # clipnorm seems to speeds up convergence #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06) #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=ada_d) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) print('[*提示] 创建模型成功,模型编译成功') return model, model_data
def build_model(image_size, n_classes, mode='training', l2_regularization=0.0, min_scale=0.1, max_scale=0.9, scales=None, aspect_ratios_global=[0.5, 1.0, 2.0], aspect_ratios_per_layer=None, two_boxes_for_ar1=True, steps=None, offsets=None, clip_boxes=False, variances=[1.0, 1.0, 1.0, 1.0], coords='centroids', normalize_coords=False, subtract_mean=None, divide_by_stddev=None, swap_channels=False, confidence_thresh=0.01, iou_threshold=0.45, top_k=200, nms_max_output_size=400, return_predictor_sizes=False): ''' Build a Keras model with SSD architecture, see references. The model consists of convolutional feature layers and a number of convolutional predictor layers that take their input from different feature layers. The model is fully convolutional. The implementation found here is a smaller version of the original architecture used in the paper (where the base network consists of a modified VGG-16 extended by a few convolutional feature layers), but of course it could easily be changed to an arbitrarily large SSD architecture by following the general design pattern used here. This implementation has 7 convolutional layers and 4 convolutional predictor layers that take their input from layers 4, 5, 6, and 7, respectively. Most of the arguments that this function takes are only needed for the anchor box layers. In case you're training the network, the parameters passed here must be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading trained weights, the parameters passed here must be the same as the ones used to produce the trained weights. Some of these arguments are explained in more detail in the documentation of the `SSDBoxEncoder` class. Note: Requires Keras v2.0 or later. Training currently works only with the TensorFlow backend (v1.0 or later). Arguments: image_size (tuple): The input image size in the format `(height, width, channels)`. n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO. mode (str, optional): One of 'training', 'inference' and 'inference_fast'. In 'training' mode, the model outputs the raw prediction tensor, while in 'inference' and 'inference_fast' modes, the raw predictions are decoded into absolute coordinates and filtered via confidence thresholding, non-maximum suppression, and top-k filtering. The difference between latter two modes is that 'inference' follows the exact procedure of the original Caffe implementation, while 'inference_fast' uses a faster prediction decoding procedure. l2_regularization (float, optional): The L2-regularization rate. Applies to all convolutional layers. min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. All scaling factors between the smallest and the largest will be linearly interpolated. Note that the second to last of the linearly interpolated scaling factors will actually be the scaling factor for the last predictor layer, while the last scaling factor is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer. This list must be one element longer than the number of predictor layers. The first `k` elements are the scaling factors for the `k` predictor layers, while the last element is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional last scaling factor must be passed either way, even if it is not being used. If a list is passed, this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero. aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be generated. This list is valid for all predictor layers. The original implementation uses more aspect ratios for some predictor layers and fewer for others. If you want to do that, too, then use the next argument instead. aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each predictor layer. This allows you to set the aspect ratios for each predictor layer individually. If a list is passed, it overrides `aspect_ratios_global`. two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise. If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated using the scaling factor for the respective layer, the second one will be generated using geometric mean of said scaling factor and next bigger scaling factor. steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over the image. If the list contains ints/floats, then that value will be used for both spatial dimensions. If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`. If no steps are provided, then they will be computed such that the anchor box center points will form an equidistant grid within the image dimensions. offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be either floats or tuples of two floats. These numbers represent for each predictor layer how many pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions of the step size specified in the `steps` argument. If the list contains floats, then that value will be used for both spatial dimensions. If the list contains tuples of two floats, then they represent `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size, which is also the recommended setting. clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries. variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by its respective variance value. coords (str, optional): The box coordinate format to be used internally by the model (i.e. this is not the input format of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates, i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values of any shape that is broadcast-compatible with the image shape. The elements of this array will be subtracted from the image pixel intensity values. For example, pass a list of three integers to perform per-channel mean normalization for color images. divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or floating point values of any shape that is broadcast-compatible with the image shape. The image pixel intensity values will be divided by the elements of this array. For example, pass a list of three integers to perform per-channel standard deviation normalization for color images. swap_channels (list, optional): Either `False` or a list of integers representing the desired order in which the input image channels should be swapped. confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific positive class in order to be considered for the non-maximum suppression stage for the respective class. A lower value will result in a larger part of the selection process being done by the non-maximum suppression stage, while a larger value will result in a larger part of the selection process happening in the confidence thresholding stage. iou_threshold (float, optional): A float in [0,1]. All boxes that have a Jaccard similarity of greater than `iou_threshold` with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers to the box's confidence score. top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the non-maximum suppression stage. nms_max_output_size (int, optional): The maximal number of predictions that will be left over after the NMS stage. return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since you can always get their sizes easily via the Keras API, but it's convenient and less error-prone to get them this way. They are only relevant for training anyway (SSDBoxEncoder needs to know the spatial dimensions of the predictor layers), for inference you don't need them. Returns: model: The Keras SSD model. predictor_sizes (optional): A Numpy array containing the `(height, width)` portion of the output tensor shape for each convolutional predictor layer. During training, the generator function needs this in order to transform the ground truth labels into tensors of identical structure as the output tensors of the model, which is in turn needed for the cost function. References: https://arxiv.org/abs/1512.02325v5 ''' n_predictor_layers = 4 # The number of predictor conv layers in the network n_classes += 1 # Account for the background class. l2_reg = l2_regularization # Make the internal name shorter. img_height, img_width, img_channels = image_size[0], image_size[ 1], image_size[2] ############################################################################ # Get a few exceptions out of the way. ############################################################################ if aspect_ratios_global is None and aspect_ratios_per_layer is None: raise ValueError( "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified." ) if aspect_ratios_per_layer: if len(aspect_ratios_per_layer) != n_predictor_layers: raise ValueError( "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}." .format(n_predictor_layers, len(aspect_ratios_per_layer))) if (min_scale is None or max_scale is None) and scales is None: raise ValueError( "Either `min_scale` and `max_scale` or `scales` need to be specified." ) if scales: if len(scales) != n_predictor_layers + 1: raise ValueError( "It must be either scales is None or len(scales) == {}, but len(scales) == {}." .format(n_predictor_layers + 1, len(scales))) else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1) if len( variances ) != 4: # We need one variance value for each of the four box coordinates raise ValueError( "4 variance values must be pased, but {} values were received.". format(len(variances))) variances = np.array(variances) if np.any(variances <= 0): raise ValueError( "All variances must be >0, but the variances given are {}".format( variances)) if (not (steps is None)) and (len(steps) != n_predictor_layers): raise ValueError( "You must provide at least one step value per predictor layer.") if (not (offsets is None)) and (len(offsets) != n_predictor_layers): raise ValueError( "You must provide at least one offset value per predictor layer.") ############################################################################ # Compute the anchor box parameters. ############################################################################ # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. if aspect_ratios_per_layer: aspect_ratios = aspect_ratios_per_layer else: aspect_ratios = [aspect_ratios_global] * n_predictor_layers # Compute the number of boxes to be predicted per cell for each predictor layer. # We need this so that we know how many channels the predictor layers need to have. if aspect_ratios_per_layer: n_boxes = [] for ar in aspect_ratios_per_layer: if (1 in ar) & two_boxes_for_ar1: n_boxes.append(len(ar) + 1) # +1 for the second box for aspect ratio 1 else: n_boxes.append(len(ar)) else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer if (1 in aspect_ratios_global) & two_boxes_for_ar1: n_boxes = len(aspect_ratios_global) + 1 else: n_boxes = len(aspect_ratios_global) n_boxes = [n_boxes] * n_predictor_layers if steps is None: steps = [None] * n_predictor_layers if offsets is None: offsets = [None] * n_predictor_layers ############################################################################ # Define functions for the Lambda layers below. ############################################################################ def identity_layer(tensor): return tensor def input_mean_normalization(tensor): return tensor - np.array(subtract_mean) def input_stddev_normalization(tensor): return tensor / np.array(divide_by_stddev) #def input_channel_swap(tensor): # if len(swap_channels) == 3: # return K.stack([tensor[...,swap_channels[0]], tensor[...,swap_channels[1]], tensor[...,swap_channels[2]]], axis=-1) # elif len(swap_channels) == 4: # return K.stack([tensor[...,swap_channels[0]], tensor[...,swap_channels[1]], tensor[...,swap_channels[2]], tensor[...,swap_channels[3]]], axis=-1) ############################################################################ # Build the network. ############################################################################ x = Input(shape=(img_height, img_width, img_channels)) # The following identity layer is only needed so that the subsequent lambda layers can be optional. x1 = Lambda(identity_layer, output_shape=(img_height, img_width, img_channels), name='identity_layer')(x) if not (subtract_mean is None): x1 = Lambda(input_mean_normalization, output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1) if not (divide_by_stddev is None): x1 = Lambda(input_stddev_normalization, output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1) #if swap_channels: #REMOVED FOR TFLITE # x1 = Lambda(input_channel_swap, output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1) conv1 = Conv2D(32, (5, 5), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1')(x1) conv1 = BatchNormalization(axis=3, momentum=0.99, name='bn1')( conv1 ) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3 conv1 = ELU(name='elu1')(conv1) pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1) conv2 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2')(pool1) conv2 = BatchNormalization(axis=3, momentum=0.99, name='bn2')(conv2) conv2 = ELU(name='elu2')(conv2) pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2) conv3 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3')(pool2) conv3 = BatchNormalization(axis=3, momentum=0.99, name='bn3')(conv3) conv3 = ELU(name='elu3')(conv3) pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3) conv4 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4')(pool3) conv4 = BatchNormalization(axis=3, momentum=0.99, name='bn4')(conv4) conv4 = ELU(name='elu4')(conv4) pool4 = MaxPooling2D(pool_size=(2, 2), name='pool4')(conv4) conv5 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5')(pool4) conv5 = BatchNormalization(axis=3, momentum=0.99, name='bn5')(conv5) conv5 = ELU(name='elu5')(conv5) pool5 = MaxPooling2D(pool_size=(2, 2), name='pool5')(conv5) conv6 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6')(pool5) conv6 = BatchNormalization(axis=3, momentum=0.99, name='bn6')(conv6) conv6 = ELU(name='elu6')(conv6) pool6 = MaxPooling2D(pool_size=(2, 2), name='pool6')(conv6) conv7 = Conv2D(32, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7')(pool6) conv7 = BatchNormalization(axis=3, momentum=0.99, name='bn7')(conv7) conv7 = ELU(name='elu7')(conv7) # The next part is to add the convolutional predictor layers on top of the base network # that we defined above. Note that I use the term "base network" differently than the paper does. # To me, the base network is everything that is not convolutional predictor layers or anchor # box layers. In this case we'll have four predictor layers, but of course you could # easily rewrite this into an arbitrarily deep base network and add an arbitrary number of # predictor layers on top of the base network by simply following the pattern shown here. # Build the convolutional predictor layers on top of conv layers 4, 5, 6, and 7. # We build two predictor layers on top of each of these layers: One for class prediction (classification), one for box coordinate prediction (localization) # We precidt `n_classes` confidence values for each box, hence the `classes` predictors have depth `n_boxes * n_classes` # We predict 4 box coordinates for each box, hence the `boxes` predictors have depth `n_boxes * 4` # Output shape of `classes`: `(batch, height, width, n_boxes * n_classes)` classes4 = Conv2D(n_boxes[0] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes4')(conv4) classes5 = Conv2D(n_boxes[1] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes5')(conv5) classes6 = Conv2D(n_boxes[2] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes6')(conv6) classes7 = Conv2D(n_boxes[3] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes7')(conv7) # Output shape of `boxes`: `(batch, height, width, n_boxes * 4)` boxes4 = Conv2D(n_boxes[0] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes4')(conv4) boxes5 = Conv2D(n_boxes[1] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes5')(conv5) boxes6 = Conv2D(n_boxes[2] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes6')(conv6) boxes7 = Conv2D(n_boxes[3] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes7')(conv7) # Generate the anchor boxes # Output shape of `anchors`: `(batch, height, width, n_boxes, 8)` anchors4 = AnchorBoxes(img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[0], this_offsets=offsets[0], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors4')(boxes4) anchors5 = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[1], this_offsets=offsets[1], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors5')(boxes5) anchors6 = AnchorBoxes(img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[2], this_offsets=offsets[2], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors6')(boxes6) anchors7 = AnchorBoxes(img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[3], this_offsets=offsets[3], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors7')(boxes7) # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)` # We want the classes isolated in the last axis to perform softmax on them #Shape inference is different for keras.layers and tf.keras.layers (-1), check documentation #So I had to manually input the intended shape of the reshape #cba = classes, boxes, anchors SHAPE: shape will be reused later anyways cba_4 = classes4.shape[1] * classes4.shape[2] * n_boxes[0] cba_5 = classes5.shape[1] * classes5.shape[2] * n_boxes[1] cba_6 = classes6.shape[1] * classes6.shape[2] * n_boxes[2] cba_7 = classes7.shape[1] * classes7.shape[2] * n_boxes[3] classes4_reshaped = Reshape((cba_4, n_classes), name='classes4_reshape')(classes4) classes5_reshaped = Reshape((cba_5, n_classes), name='classes5_reshape')(classes5) classes6_reshaped = Reshape((cba_6, n_classes), name='classes6_reshape')(classes6) classes7_reshaped = Reshape((cba_7, n_classes), name='classes7_reshape')(classes7) # Reshape the box coordinate predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)` #shape for classes_reshaped is SAME with boxes EXCEPT for n_classes and anchors so NO NEED TO RECOMPUTE # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss boxes4_reshaped = Reshape((cba_4, 4), name='boxes4_reshape')(boxes4) boxes5_reshaped = Reshape((cba_5, 4), name='boxes5_reshape')(boxes5) boxes6_reshaped = Reshape((cba_6, 4), name='boxes6_reshape')(boxes6) boxes7_reshaped = Reshape((cba_7, 4), name='boxes7_reshape')(boxes7) # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)` anchors4_reshaped = Reshape((cba_4, 8), name='anchors4_reshape')(anchors4) anchors5_reshaped = Reshape((cba_5, 8), name='anchors5_reshape')(anchors5) anchors6_reshaped = Reshape((cba_6, 8), name='anchors6_reshape')(anchors6) anchors7_reshaped = Reshape((cba_7, 8), name='anchors7_reshape')(anchors7) # Concatenate the predictions from the different layers and the assosciated anchor box tensors # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions, # so we want to concatenate along axis 1 # Output shape of `classes_concat`: (batch, n_boxes_total, n_classes) classes_concat = Concatenate(axis=1, name='classes_concat')([ classes4_reshaped, classes5_reshaped, classes6_reshaped, classes7_reshaped ]) # Output shape of `boxes_concat`: (batch, n_boxes_total, 4) boxes_concat = Concatenate(axis=1, name='boxes_concat')( [boxes4_reshaped, boxes5_reshaped, boxes6_reshaped, boxes7_reshaped]) # Output shape of `anchors_concat`: (batch, n_boxes_total, 8) anchors_concat = Concatenate(axis=1, name='anchors_concat')([ anchors4_reshaped, anchors5_reshaped, anchors6_reshaped, anchors7_reshaped ]) # The box coordinate predictions will go into the loss function just the way they are, # but for the class predictions, we'll apply a softmax activation layer first classes_softmax = Activation('softmax', name='classes_softmax')(classes_concat) # Concatenate the class and box coordinate predictions and the anchors to one large predictions tensor # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8) predictions = Concatenate(axis=2, name='predictions')( [classes_softmax, boxes_concat, anchors_concat]) if mode == 'training': model = Model(inputs=x, outputs=predictions) elif mode == 'inference': decoded_predictions = DecodeDetections( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) elif mode == 'inference_fast': decoded_predictions = DecodeDetectionsFast( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) else: raise ValueError( "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'." .format(mode)) if return_predictor_sizes: # The spatial dimensions are the same for the `classes` and `boxes` predictor layers. predictor_sizes = np.array([ classes4._keras_shape[1:3], classes5._keras_shape[1:3], classes6._keras_shape[1:3], classes7._keras_shape[1:3] ]) return model, predictor_sizes else: return model
def build_stage2_generator(): """ Create Stage-II generator containing the CA Augmentation Network, the image encoder and the generator network """ # 1. CA Augmentation Network input_layer = Input(shape=(1024, )) input_lr_images = Input(shape=(64, 64, 3)) ca = Dense(256)(input_layer) mean_logsigma = LeakyReLU(alpha=0.2)(ca) c = Lambda(generate_c)(mean_logsigma) # 2. Image Encoder x = ZeroPadding2D(padding=(1, 1))(input_lr_images) x = Conv2D(128, kernel_size=(3, 3), strides=1, use_bias=False)(x) x = ReLU()(x) x = ZeroPadding2D(padding=(1, 1))(x) x = Conv2D(256, kernel_size=(4, 4), strides=2, use_bias=False)(x) x = BatchNormalization()(x) x = ReLU()(x) x = ZeroPadding2D(padding=(1, 1))(x) x = Conv2D(512, kernel_size=(4, 4), strides=2, use_bias=False)(x) x = BatchNormalization()(x) x = ReLU()(x) # 3. Joint c_code = Lambda(joint_block)([c, x]) x = ZeroPadding2D(padding=(1, 1))(c_code) x = Conv2D(512, kernel_size=(3, 3), strides=1, use_bias=False)(x) x = BatchNormalization()(x) x = ReLU()(x) # 4. Residual blocks x = residual_block(x) x = residual_block(x) x = residual_block(x) x = residual_block(x) # 5. Upsampling blocks x = UpSampling2D(size=(2, 2))(x) x = Conv2D(512, kernel_size=3, padding="same", strides=1, use_bias=False)(x) x = BatchNormalization()(x) x = ReLU()(x) x = UpSampling2D(size=(2, 2))(x) x = Conv2D(256, kernel_size=3, padding="same", strides=1, use_bias=False)(x) x = BatchNormalization()(x) x = ReLU()(x) x = UpSampling2D(size=(2, 2))(x) x = Conv2D(128, kernel_size=3, padding="same", strides=1, use_bias=False)(x) x = BatchNormalization()(x) x = ReLU()(x) x = UpSampling2D(size=(2, 2))(x) x = Conv2D(64, kernel_size=3, padding="same", strides=1, use_bias=False)(x) x = BatchNormalization()(x) x = ReLU()(x) x = Conv2D(3, kernel_size=3, padding="same", strides=1, use_bias=False)(x) x = Activation('tanh')(x) model = Model(inputs=[input_layer, input_lr_images], outputs=[x, mean_logsigma]) return model
def define_generator(image_shape, probe_light_shape, latent_dim): init = RandomNormal(stddev=0.02) in_image = Input(shape=image_shape) probe_image_target = Input(shape=probe_light_shape) conv1 = Conv2D(64, (7, 7), padding='same', kernel_initializer=init)(in_image) conv1 = BatchNormalization(axis=-1)(conv1) conv1 - LeakyReLU(alpha=0.2)(conv1) pool1 = AveragePooling2D(pool_size=(2, 2))(conv1) conv2 = Conv2D(128, (3, 3), strides=(2, 2), padding='same', kernel_initializer=init)(pool1) conv2 = BatchNormalization(axis=-1)(conv2) conv2 = LeakyReLU(alpha=0.2)(conv2) pool2 = AveragePooling2D(pool_size=(2, 2))(conv2) conv3 = Conv2D(256, (3, 3), strides=(2, 2), padding='same', kernel_initializer=init)(pool2) conv3 = BatchNormalization(axis=-1)(conv3) conv3 = LeakyReLU(alpha=0.2)(conv3) pn = Conv2D(64, (7, 7), padding='same', kernel_initializer=init)(probe_image_target) pn = BatchNormalization(axis=-1)(pn) pn = LeakyReLU(alpha=0.2)(pn) pn = AveragePooling2D(pool_size=(2, 2))(pn) pn = Conv2D(128, (3, 3), strides=(2, 2), padding='same', kernel_initializer=init)(pn) pn = BatchNormalization(axis=-1)(pn) pn = LeakyReLU(alpha=0.2)(pn) pn = AveragePooling2D(pool_size=(2, 2))(pn) pn = Conv2D(256, (3, 3), strides=(2, 2), padding='same', kernel_initializer=init)(pn) pn = BatchNormalization(axis=-1)(pn) pn = LeakyReLU(alpha=0.2)(pn) g = Flatten()(conv3) pn = Flatten()(pn) g = Concatenate()([g, pn]) g = Dense(latent_dim, activation='relu', kernel_initializer=init)(g) g = Dense(16 * 16 * 256, activation='relu', kernel_initializer=init)(g) g = Reshape((16, 16, 256))(g) sub_layer1 = Lambda(lambda x: tf.nn.depth_to_space(x, 2)) sub_layer2 = Lambda(lambda x: tf.nn.depth_to_space(x, 2)) up1 = Conv2DTranspose(128, (3, 3), strides=(2, 2), padding='same', kernel_initializer=init)(sub_layer1(inputs=g)) up1 = BatchNormalization(axis=-1)(up1) up1 = LeakyReLU(alpha=0.2)(up1) merge1 = Concatenate()([up1, conv2]) up2 = Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same', kernel_initializer=init)(sub_layer2(inputs=merge1)) up2 = BatchNormalization(axis=-1)(up2) up2 = LeakyReLU(alpha=0.2)(up2) merge2 = Concatenate()([up2, conv1]) final = Conv2D(3, (7, 7), padding='same', kernel_initializer=init)(merge2) final = BatchNormalization(axis=-1)(final) out_image = Activation('tanh')(final) model = Model([in_image, probe_image_target], out_image) return model
def build_model(self): hypo_input = Input(shape=(1, ), name='Hyponym') neg_input = Input(shape=(self.synonym_sample_n, ), name='Negative') hyper_input = Input(shape=(1, ), name='Hypernym') hypo_embedding, neg_embedding, hyper_embedding = self.embeddings_layer( [hypo_input, neg_input, hyper_input]) hypo_embedding = Dropout(rate=0.3, name='Dropout_Hypo')(hypo_embedding) hyper_embedding = Dropout(rate=0.3, name='Dropout_Hyper')(hyper_embedding) neg_embedding = Dropout(rate=0.3, name='Dropout_Neg')(neg_embedding) phi_layer = [] for i in range(self.phi_k): phi_layer.append( Dense(self.data.embeddings_matrix.shape[1], activation=None, use_bias=False, kernel_initializer=RandomIdentity(), name='Phi%d' % (i))(hypo_embedding)) # either merge phi layers in 1 or flatten single phi projection if self.phi_k == 1: # flatten tensors phi = Flatten(name='Flatten_Phi')(phi_layer[0]) #hyper_embedding = Flatten(name='Flatten_Hyper')(hyper_embedding) else: phi = Concatenate(axis=1)(phi_layer) phi = Dropout(rate=0.3, name='Dropout_Phi')(phi) # compute mean phi projection #phi_mean = Lambda(lambda x: K.mean(x, axis=1, keepdims=True))(phi) # compute synonymy similarity to each projection phi_negative = Dot(axes=-1, normalize=True, name='SimNeg')([phi, neg_embedding]) # compute hypernym similarity to each projection phi_hyper = Dot(axes=-1, normalize=True, name='SimHyper')([phi, hyper_embedding]) if self.phi_k > 1: # find projection which yields highest projection phi_hyper = Flatten(name='Flatten_PhiHyper')(phi_hyper) # in the case of multiple phi, calculate the mean similarity between each projection # and negative case phi_negative = Lambda(lambda x: K.mean(x, axis=1), name='MeanPhiNeg')(phi_negative) zero_neg = Lambda(lambda x: K.mean(x * 0., axis=-1), name='ZeroPhiNeg')(phi_negative) phi_hyper = Subtract(name='DummySub')([phi_hyper, zero_neg]) prediction = Dense( 1, activation="sigmoid", name='Prediction', use_bias=True, kernel_initializer='random_normal', bias_initializer=Zeros(), )(phi_hyper) model = Model(inputs=[hypo_input, neg_input, hyper_input], outputs=prediction) regul_loss = self.custom_loss(phi_negative, self.lambda_c) adam = Adam(lr=self.lr, beta_1=self.beta1, beta_2=self.beta2, clipnorm=self.clip_value) #adam = Adadelta() model.compile(optimizer=adam, loss=regul_loss, metrics=['accuracy']) return model
""" def return_pad_me(padding): def pad_me(x): #FRANK# x[:,:,:y,:] slice x off from y at the given axis. return (tf.concat((x, x[:, :, :padding, :]), 2)) # return(tf.concat((2,x,x[:,:,:padding,:]))) return (pad_me) input_shape = x_train_eve[0].shape print("image shape", input_shape) model_event = Sequential(name='Sequential_for_event') model_event.add( Lambda(return_pad_me(4), input_shape=input_shape, name='event')) model_event.add( Conv2D(32, kernel_size=(5, 5), strides=(1, 1), activation='relu', data_format='channels_first', name='event_2D_1')) model_event.add( Lambda(return_pad_me(1), input_shape=input_shape, name='event_padding_1')) model_event.add( MaxPooling2D(pool_size=(2, 2), strides=(2, 2), data_format='channels_first',
model = Sequential() model.add(Convolution2D(16, 3, 3, activation='relu', input_shape=(128, 128, 3))) model.add(BatchNormalization()) model.add(Convolution2D(16, 3, 3, activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(BatchNormalization()) model.add(Flatten()) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(BatchNormalization()) # model.add(Dense(128, activation='relu')) # model.add(Dropout(0.5)) # model.add(BatchNormalization()) model.add(Dense(len(trainSet.hotEncodeReverse), activation='softmax')) model.add(Lambda(lambda x: x, name='colors_prob')) model.summary() #categorical_crossentropy model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) saver = tf.train.Saver() try: with open(os.path.join(model_n_ckpt_dir,'model.pb'), 'wb') as f: f.write(tf.keras.backend.get_session().graph_def.SerializeToString()) except: print("failed model n ckpt ") model.fit(trainSet.allData['images'], trainSet.allData['labels'], batch_size=256, nb_epoch=5, verbose=1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) S = nx.to_scipy_sparse_matrix(graph) t1 = time() S = (S + S.T) / 2 self._node_num = len(graph.nodes) # Generate encoder, decoder and autoencoder self._num_iter = self._n_iter # If cannot use previous step information, initialize new models self._encoder = get_encoder(self._node_num, self._d, self._K, self._n_units, self._nu1, self._nu2, self._actfn) self._decoder = get_decoder(self._node_num, self._d, self._K, self._n_units, self._nu1, self._nu2, self._actfn) self._autoencoder = get_autoencoder(self._encoder, self._decoder) # Initialize self._model # Input x_in = Input(shape=(2 * self._node_num, ), name='x_in') x1 = Lambda(lambda x: x[:, 0:self._node_num], output_shape=(self._node_num, ))(x_in) x2 = Lambda(lambda x: x[:, self._node_num:2 * self._node_num], output_shape=(self._node_num, ))(x_in) # Process inputs [x_hat1, y1] = self._autoencoder(x1) [x_hat2, y2] = self._autoencoder(x2) # Outputs x_diff1 = Subtract()([x_hat1, x1]) x_diff2 = Subtract()([x_hat2, x2]) y_diff = Subtract()([y2, y1]) # Objectives def weighted_mse_x(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains x_hat - x y_true: Contains [b, deg] ''' return KBack.sum(KBack.square( y_pred * y_true[:, 0:self._node_num]), axis=-1) / y_true[:, self._node_num] def weighted_mse_y(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains y2 - y1 y_true: Contains s12 ''' min_batch_size = KBack.shape(y_true)[0] return KBack.reshape(KBack.sum(KBack.square(y_pred), axis=-1), [min_batch_size, 1]) * y_true # Model self._model = Model(inputs=x_in, outputs=[x_diff1, x_diff2, y_diff]) sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True, clipnorm=1.0) # adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08) self._model.compile( optimizer=sgd, loss=[weighted_mse_x, weighted_mse_x, weighted_mse_y], loss_weights=[1, 1, self._alpha]) self._model.fit_generator( generator=batch_generator_Teammate(S, self._beta, self._n_batch, True), epochs=self._num_iter, steps_per_epoch=S.nonzero()[0].shape[0] // self._n_batch, verbose=1) # Get embedding for all points self._Y = model_batch_predictor(self._autoencoder, S, self._n_batch) t2 = time() # Save the autoencoder and its weights if (self._weightfile is not None): saveweights(self._encoder, self._weightfile[0]) saveweights(self._decoder, self._weightfile[1]) if (self._modelfile is not None): savemodel(self._encoder, self._modelfile[0]) savemodel(self._decoder, self._modelfile[1]) if (self._savefilesuffix is not None): saveweights(self._encoder, 'T_encoder_weights_' + self._savefilesuffix + '.hdf5') saveweights(self._decoder, 'T_decoder_weights_' + self._savefilesuffix + '.hdf5') savemodel(self._encoder, 'T_encoder_model_' + self._savefilesuffix + '.json') savemodel(self._decoder, 'T_decoder_model_' + self._savefilesuffix + '.json') # Save the embedding np.savetxt('T_embedding_' + self._savefilesuffix + '.txt', self._Y) return self._Y, (t2 - t1)
def __call__(self, inputs, *args, **kwargs): # Unfortunate workaround necessary for TF < 1.13 inputs_padded = Lambda(self._pad)(inputs) return super().__call__(inputs_padded, *args, **kwargs)
def __init__(self, dims): Lambda.__init__(self, lambda x: tf.transpose(x, dims))
def __init__(self, num_classes=19, output_stride=16, backbonetype='mobilenetv2', weights='imagenet', dl_input_shape=(None, 483, 769, 3), weight_decay=0.00004, pooling='global', residual_shortcut=False): super(CMSNet, self).__init__(name='cmsnet') """ :param num_classes: (Default value = 19) :param output_stride: (Default value = 16) if strid count is 4 remove stride from block 13 and inser atrous in 14, 15 and 16 if strid count is 3 remove stride from block 6/13 and inser atrous rate 2 in 7-13/ and rate 4 14-16 :param backbonetype: (Default value = 'mobilenetv2') :param weights: (Default value = 'imagenet') :param input_shape: (Default value = (None, 483,769,3) :param weight_decay: use 0.00004 for MobileNet-V2 or Xcpetion model backbonetype. Use 0.0001 for ResNet backbonetype. """ self.logger = logging.getLogger('perception.models.CMSNet') self.logger.info('creating an instance of CMSNet with backbone ' + backbonetype + ', OS' + str(output_stride) + ', nclass=' + str(num_classes) + ', input=' + str(dl_input_shape) + ', pooling=' + pooling + ', residual=' + str(residual_shortcut)) self.num_classes = num_classes self.output_stride = output_stride self.dl_input_shape = dl_input_shape self._createBackbone(backbonetype=backbonetype, output_stride=output_stride) # All with 256 filters and batch normalization. # one 1×1 convolution and three 3×3 convolutions with rates = (6, 12, 18) when output stride = 16. # Rates are doubled when output stride = 8. #Create Spatial Pyramid Pooling x = self.backbone.output pooling_shape = self.backbone.compute_output_shape(self.dl_input_shape) pooling_shape_float = tf.cast(pooling_shape[1:3], tf.float32) assert pooling in [ 'aspp', 'spp', 'global' ], "Only suported pooling= 'aspp', 'spp' or 'global'." if pooling == 'aspp': if output_stride == 16: rates = (6, 12, 18) elif output_stride == 8: rates = (12, 24, 36) #gride lavel: pooling x0 = Conv2D(filters=256, kernel_size=3, name='aspp_0_expand', padding="same", dilation_rate=rates[0], kernel_regularizer=l2(weight_decay))(x) x0 = BatchNormalization(name='aspp_0_expand_BN')(x0) #epsilon=1e-5 x0 = ReLU(name='aspp_0_expand_relu')(x0) x1 = Conv2D(filters=256, kernel_size=3, name='aspp_1_expand', padding="same", dilation_rate=rates[1], kernel_regularizer=l2(weight_decay))(x) x1 = BatchNormalization(name='aspp_1_expand_BN')(x1) #epsilon=1e-5 x1 = ReLU(name='aspp_1_expand_relu')(x1) x2 = Conv2D(filters=256, kernel_size=3, name='aspp_2_expand', padding="same", dilation_rate=rates[2], kernel_regularizer=l2(weight_decay))(x) x2 = BatchNormalization(name='aspp_2_expand_BN')(x2) #epsilon=1e-5 x2 = ReLU(name='aspp_2_expand_relu')(x2) #gride lavel: all xn = Conv2D(filters=256, kernel_size=1, name='aspp_n_expand', kernel_regularizer=l2(weight_decay))(x) xn = BatchNormalization(name='aspp_n_expand_BN')(xn) #epsilon=1e-5 xn = ReLU(name='aspp_n_expand_relu')(xn) #Concatenate spatial pyramid pooling x0.set_shape(pooling_shape[0:3].concatenate(x0.get_shape()[-1])) x1.set_shape(pooling_shape[0:3].concatenate(x1.get_shape()[-1])) x2.set_shape(pooling_shape[0:3].concatenate(x2.get_shape()[-1])) xn.set_shape(pooling_shape[0:3].concatenate(xn.get_shape()[-1])) x = Concatenate(name='aspp_concatenate')([x0, x1, x2, xn]) elif pooling == 'spp': rates = (1, 2, 3, 6) #gride lavel: pooling x0 = AvgPool2D(pool_size=tf.cast(pooling_shape_float / rates[0], tf.int32), padding="valid", name='spp_0_average_pooling2d')(x) x0 = Conv2D(filters=int(pooling_shape[-1] / len(rates)), kernel_size=1, name='spp_0_expand', kernel_regularizer=l2(weight_decay))(x0) x0 = BatchNormalization(name='spp_0_expand_BN')(x0) #epsilon=1e-5 x0 = ReLU(name='spp_0_expand_relu')(x0) if tf.__version__.split('.')[0] == '1': x0 = Lambda(lambda x0: tf.image.resize_bilinear( x0, pooling_shape[1:3], align_corners=True), name='spp_0_resize_bilinear')(x0) else: x0 = Lambda(lambda x0: tf.image.resize(x0, pooling_shape[1:3], method=tf.image. ResizeMethod.BILINEAR), name='spp_0_resize_bilinear')(x0) x1 = AvgPool2D(pool_size=tf.cast(pooling_shape_float / rates[1], tf.int32), padding="valid", name='spp_1_average_pooling2d')(x) x1 = Conv2D(filters=int(pooling_shape[-1] / len(rates)), kernel_size=1, name='spp_1_expand', kernel_regularizer=l2(weight_decay))(x1) x1 = BatchNormalization(name='spp_1_expand_BN')(x1) #epsilon=1e-5 x1 = ReLU(name='spp_1_expand_relu')(x1) if tf.__version__.split('.')[0] == '1': x1 = Lambda(lambda x1: tf.image.resize_bilinear( x1, pooling_shape[1:3], align_corners=True), name='spp_1_resize_bilinear')(x1) else: x1 = Lambda(lambda x1: tf.image.resize(x1, pooling_shape[1:3], method=tf.image. ResizeMethod.BILINEAR), name='spp_1_resize_bilinear')(x1) x2 = AvgPool2D(pool_size=tf.cast(pooling_shape_float / rates[2], tf.int32), padding="valid", name='spp_2_average_pooling2d')(x) x2 = Conv2D(filters=int(pooling_shape[-1] / len(rates)), kernel_size=1, name='spp_2_expand', kernel_regularizer=l2(weight_decay))(x2) x2 = BatchNormalization(name='spp_2_expand_BN')(x2) #epsilon=1e-5 x2 = ReLU(name='spp_2_expand_relu')(x2) if tf.__version__.split('.')[0] == '1': x2 = Lambda(lambda x2: tf.image.resize_bilinear( x2, pooling_shape[1:3], align_corners=True), name='spp_2_resize_bilinear')(x2) else: x2 = Lambda(lambda x2: tf.image.resize(x2, pooling_shape[1:3], method=tf.image. ResizeMethod.BILINEAR), name='spp_2_resize_bilinear')(x2) x3 = AvgPool2D(pool_size=tf.cast(pooling_shape_float / rates[3], tf.int32), padding="valid", name='spp_3_average_pooling2d')(x) x3 = Conv2D(filters=int(pooling_shape[-1] / len(rates)), kernel_size=1, name='spp_3_expand', kernel_regularizer=l2(weight_decay))(x3) x3 = BatchNormalization(name='spp_3_expand_BN')(x3) #epsilon=1e-5 x3 = ReLU(name='spp_3_expand_relu')(x3) if tf.__version__.split('.')[0] == '1': x3 = Lambda(lambda x3: tf.image.resize_bilinear( x3, pooling_shape[1:3], align_corners=True), name='spp_3_resize_bilinear')(x3) else: x3 = Lambda(lambda x3: tf.image.resize(x3, pooling_shape[1:3], method=tf.image. ResizeMethod.BILINEAR), name='spp_3_resize_bilinear')(x3) #gride lavel: all xn = Conv2D(filters=int(pooling_shape[-1] / len(rates)), kernel_size=1, name='spp_n_expand', kernel_regularizer=l2(weight_decay))(x) xn = BatchNormalization(name='spp_n_expand_BN')(xn) #epsilon=1e-5 xn = ReLU(name='spp_n_expand_relu')(xn) #Concatenate spatial pyramid pooling xn.set_shape(pooling_shape[0:3].concatenate(xn.get_shape()[-1])) x = Concatenate(name='spp_concatenate')([x0, x1, x2, xn]) elif pooling == 'global': #gride lavel: pooling x0 = AvgPool2D(pool_size=pooling_shape[1:3], padding="valid", name='spp_0_average_pooling2d')(x) x0 = Conv2D(filters=256, kernel_size=1, name='spp_0_expand', kernel_regularizer=l2(weight_decay))(x0) x0 = BatchNormalization(name='spp_0_expand_BN')(x0) #epsilon=1e-5 x0 = ReLU(name='spp_0_expand_relu')(x0) # x0 = tf.image.resize(x0, # size=pooling_shape[1:3], # method=tf.image.ResizeMethod.BILINEAR, name='spp_0_resize_bilinear') if tf.__version__.split('.')[0] == '1': x0 = Lambda(lambda x0: tf.image.resize_bilinear( x0, pooling_shape[1:3], align_corners=True), name='spp_0_resize_bilinear')(x0) else: x0 = Lambda(lambda x0: tf.image.resize(x0, pooling_shape[1:3], method=tf.image. ResizeMethod.BILINEAR), name='spp_0_resize_bilinear')(x0) #gride lavel: all xn = Conv2D(filters=256, kernel_size=1, name='spp_1_expand', kernel_regularizer=l2(weight_decay))(x) xn = BatchNormalization(name='spp_1_expand_BN')(xn) #epsilon=1e-5 xn = ReLU(name='spp_1_expand_relu')(xn) #Concatenate spatial pyramid pooling xn.set_shape(pooling_shape[0:3].concatenate(xn.get_shape()[-1])) x = Concatenate(name='spp_concatenate')([x0, xn]) #Concate Projection x = Conv2D(filters=256, kernel_size=1, name='spp_concat_project', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='spp_concat_project_BN')(x) #epsilon=1e-5 x = ReLU(name='spp_concat_project_relu')(x) if residual_shortcut: assert output_stride == 16, "For while residual shotcut is available for atous with os16." #self.strideOutput8LayerName #block_6_project_BN (BatchNormal (None, 61, 97, 64) os8_shape = self.backbone.get_layer( self.strideOutput8LayerName).output_shape os8_output = self.backbone.get_layer( self.strideOutput8LayerName).output x = Conv2D(filters=os8_shape[-1], kernel_size=1, name='shotcut_2x_conv', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='shotcut_2x_BN')(x) #epsilon=1e-5 if tf.__version__.split('.')[0] == '1': x = Lambda(lambda x: tf.image.resize_bilinear( x, os8_shape[1:3], align_corners=True), name='shotcut_2x_bilinear')(x) else: x = Lambda(lambda x: tf.image.resize( x, os8_shape[1:3], method=tf.image.ResizeMethod.BILINEAR), name='shotcut_2x_bilinear')(x) x = ReLU(name='shotcut_2x_relu')(x) x = Add(name='shotcut_2x_add')([x, os8_output]) x = Dropout(rate=0.1, name='dropout')(x) #Semantic Segmentation x = Conv2D(filters=num_classes, kernel_size=1, name='segmentation', kernel_regularizer=l2(weight_decay))(x) #x = BatchNormalization(name='segmentation_BN')(x) # x = tf.image.resize(x, size=self.dl_input_shape[1:3], # method=tf.image.ResizeMethod.BILINEAR, name='segmentation_bilinear') if tf.__version__.split('.')[0] == '1': x = Lambda(lambda x: tf.image.resize_bilinear( x, self.dl_input_shape[1:3], align_corners=True), name='segmentation_bilinear')(x) else: x = Lambda(lambda x: tf.image.resize(x, self.dl_input_shape[1:3], method=tf.image.ResizeMethod. BILINEAR), name='segmentation_bilinear')(x) x = Softmax(name='logistic_softmax')(x) #logist to training #argmax super(CMSNet, self).__init__(inputs=self.backbone.input, outputs=x, name='cmsnet')
def __init__(self, scale): Lambda.__init__(self, lambda x: tf.cast(x, tf.float32) * scale)
def bilinear_cnn(num_classes, input_shape, backbone_cnn=None, fB=None, conv1x1=None, dense_layers=[], dropout_rate=None): '''Combine two feature extracting CNNs into single Model with bilinear_pooling + FC layers. fA and fB should output 4D tensors of equal shape, except (optionally) in # of channels. Parameters ---------- fA : KerasModel or str Feature network A. Should output features (N, H, W, cA). If str, loads the corresponding ImageNet model from `keras.applications`. fB : KerasModel or str, optional Feature network B. Should output features (N, H, W, cB). If str, loads the corresponding ImageNet model from `keras.applications`. If `None`, will return symmetric BCNN using fA. num_classes : int Number of classes for softmax output layer input_shape : tuple of int Shape of input images. Must be compatible with fA.input & fB.input. conv1x1 : int or iterable(int), optional Add a 1x1 conv to reduce number of channels in (fA, fB) to some value(s). If iterable, must be length 2; values then mapped to (fA, fB). dense_layers : iterable of int, optional Sizes for additional Dense layers between bilinear vector and softmax. Default=[]. dropout_rate: float, optional Specify a dropout rate for Dense layers Returns ------- B-CNN : KerasModel Single bilinear CNN composed from fA & fB (asymmetric) or fA with itself (symmetric) ''' assert backbone_cnn is not None fA = make_backbone(backbone_cnn, input_shape) fB = make_backbone(fB, input_shape) input_image = Input(shape=input_shape) outA = fA(input_image) if fB is None: outB = outA # symmetric B-CNN else: outB = fB(input_image) # asymmetric B-CNN if isinstance(conv1x1, int): outA = Conv2D(conv1x1, (1, 1), name='reduce_A')(outA) outB = Conv2D(conv1x1, (1, 1), name='reduce_B')(outB) elif hasattr(conv1x1, '__iter__'): assert len(conv1x1) == 2, 'if iterable, conv1x1 must have length of 2' outA = Conv2D(conv1x1[0], (1, 1), name='reduce_A')(outA) outB = Conv2D(conv1x1[1], (1, 1), name='reduce_B')(outB) x = Lambda(bilinear_pooling, name='bilinear_pooling')([outA, outB]) x = make_dense_layers(dense_layers, dropout=dropout_rate)(x) pred = Dense(num_classes, activation='softmax')(x) model = KerasModel(inputs=input_image, outputs=pred) return model
def __init__(self, axis=-1): Lambda.__init__(self, lambda x: tf.squeeze(x, axis=axis))
def clipped_relu(self, inputs): relu = Lambda(lambda y: K.minimum(K.maximum(y, 0), 20), name=f"clipped_relu_{self.clipped_relu_count}")(inputs) self.clipped_relu_count += 1 return relu