def MobileNetV3(stack_fn, last_point_ch, input_shape=None, alpha=1.0, model_type='large', minimalistic=False, include_top=True, weights='imagenet', input_tensor=None, classes=1000, pooling=None, dropout_rate=0.2, **kwargs): """Instantiates the MobileNetV3 architecture. # Arguments stack_fn: a function that returns output tensor for the stacked residual blocks. last_point_ch: number channels at the last layer (before top) input_shape: optional shape tuple, to be specified if you would like to use a model with an input img resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. `(160, 160, 3)` would be one valid value. alpha: controls the width of the network. This is known as the depth multiplier in the MobileNetV3 paper, but the name is kept for consistency with MobileNetV1 in Keras. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. model_type: MobileNetV3 is defined as two models: large and small. These models are targeted at high and low resource use cases respectively. minimalistic: In addition to large and small models this module also contains so-called minimalistic models, these models have the same per-layer dimensions characteristic as MobilenetV3 however, they don't utilize any of the advanced blocks (squeeze-and-excite units, hard-swish, and 5x5 convolutions). While these models are less efficient on CPU, they are much more performant on GPU/DSP. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. dropout_rate: fraction of the input units to drop on the last layer # Returns A Keras model instance. # Raises ValueError: in case of invalid model type, argument for `weights`, or invalid input shape when weights='imagenet' """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) # If input_shape is None and input_tensor is None using standart shape if input_shape is None and input_tensor is None: input_shape = (None, None, 3) if K.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if rows and cols and (rows < 32 or cols < 32): raise ValueError( 'Input size must be at least 32x32; got `input_shape=' + str(input_shape) + '`') if weights == 'imagenet': if minimalistic is False and alpha not in [0.75, 1.0] \ or minimalistic is True and alpha != 1.0: raise ValueError( 'If imagenet weights are being loaded, ' 'alpha can be one of `0.75`, `1.0` for non minimalistic' ' or `1.0` for minimalistic only.') if rows != cols or rows != 224: warnings.warn('`input_shape` is undefined or non-square, ' 'or `rows` is not 224.' ' Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = Input(shape=input_shape) else: #if not K.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if minimalistic: kernel = 3 activation = relu se_ratio = None else: kernel = 5 activation = hard_swish se_ratio = 0.25 x = ZeroPadding2D(padding=correct_pad(K, img_input, 3), name='Conv_pad')(img_input) x = Conv2D(16, kernel_size=3, strides=(2, 2), padding='valid', use_bias=False, name='Conv')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv/BatchNorm')(x) x = Activation(activation)(x) x, skip_feature = stack_fn(x, kernel, activation, se_ratio) # keep end of the feature extrator as final feature map final_feature = x last_conv_ch = _depth(K.int_shape(x)[channel_axis] * 6) # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_point_ch = _depth(last_point_ch * alpha) x = Conv2D(last_conv_ch, kernel_size=1, padding='same', use_bias=False, name='Conv_1')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1/BatchNorm')(x) x = Activation(activation)(x) if include_top: x = GlobalAveragePooling2D()(x) if channel_axis == 1: x = Reshape((last_conv_ch, 1, 1))(x) else: x = Reshape((1, 1, last_conv_ch))(x) x = Conv2D(last_point_ch, kernel_size=1, padding='same', name='Conv_2')(x) x = Activation(activation)(x) if dropout_rate > 0: x = Dropout(dropout_rate)(x) x = Conv2D(classes, kernel_size=1, padding='same', name='Logits')(x) x = Flatten()(x) x = Softmax(name='Predictions/Softmax')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='MobilenetV3' + model_type) # Load weights. if weights == 'imagenet': model_name = "{}{}_224_{}_float".format( model_type, '_minimalistic' if minimalistic else '', str(alpha)) if include_top: file_name = 'weights_mobilenet_v3_' + model_name + '.h5' file_hash = WEIGHTS_HASHES[model_name][0] else: file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5' file_hash = WEIGHTS_HASHES[model_name][1] weights_path = get_file(file_name, BASE_WEIGHT_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) #return model return final_feature, skip_feature, len(model.layers) - 3
def line_lstm_ctc(input_shape, output_shape, window_width=28, window_stride=14): image_height, image_width = input_shape output_length, num_classes = output_shape num_windows = int((image_width - window_width) / window_stride) + 1 if num_windows < output_length: raise ValueError( f'Window width/stride need to generate at least {output_length} windows (currently {num_windows})' ) image_input = Input(shape=input_shape, name='image') y_true = Input(shape=(output_length, ), name='y_true') input_length = Input(shape=(1, ), name='input_length') label_length = Input(shape=(1, ), name='label_length') gpu_present = len(device_lib.list_local_devices()) > 1 lstm_fn = CuDNNLSTM if gpu_present else LSTM # Your code should use slide_window and extract image patches from image_input. # Pass a convolutional model over each image patch to generate a feature vector per window. # Pass these features through one or more LSTM layers. # Convert the lstm outputs to softmax outputs. # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length). ##### Your code below (Lab 3) image_reshaped = Reshape((image_height, image_width, 1))(image_input) # (image_height, image_width, 1) image_patches = Lambda(slide_window, arguments={ 'window_width': window_width, 'window_stride': window_stride })(image_reshaped) convnet = lenet((image_height, window_width, 1), (num_classes, )) convnet = KerasModel(inputs=convnet.inputs, outputs=convnet.layers[-2].output) convnet_outputs = TimeDistributed(convnet)(image_patches) # (num_windows, 200) lstm_output = lstm_fn(200, return_sequences=True)(convnet_outputs) softmax_output = Dense(num_classes, activation='softmax', name='softmax_output')(lstm_output) ##### Your code above (Lab 3) input_length_processed = Lambda( lambda x, num_windows=None: x * num_windows, arguments={'num_windows': num_windows})(input_length) ctc_loss_output = Lambda( lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name='ctc_loss')( [y_true, softmax_output, input_length_processed, label_length]) ctc_decoded_output = Lambda( lambda x: ctc_decode(x[0], x[1], output_length), name='ctc_decoded')([softmax_output, input_length_processed]) model = KerasModel( inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output]) return model
def init(self, printSummary=True): # keep_negitive = 0 on inputs, otherwise for weights keep default (=1) encoded_dim = self.pams['encoded_dim'] CNN_layer_nodes = self.pams['CNN_layer_nodes'] CNN_kernel_size = self.pams['CNN_kernel_size'] CNN_pool = self.pams['CNN_pool'] Dense_layer_nodes = self.pams['Dense_layer_nodes'] # does not include encoded layer channels_first = self.pams['channels_first'] inputs = Input(shape=self.pams['shape']) # adapt this if using `channels_first` image data format # load bits to quantize nBits_input = self.pams['nBits_input'] nBits_accum = self.pams['nBits_accum'] nBits_weight = self.pams['nBits_weight'] nBits_encod = self.pams['nBits_encod'] nBits_dense = self.pams['nBits_dense'] if 'nBits_dense' in self.pams else nBits_weight nBits_conv = self.pams['nBits_conv' ] if 'nBits_conv' in self.pams else nBits_weight input_Qbits = self.GetQbits(nBits_input, nBits_input['keep_negative']) accum_Qbits = self.GetQbits(nBits_accum, nBits_accum['keep_negative']) dense_Qbits = self.GetQbits(nBits_dense, nBits_dense['keep_negative']) conv_Qbits = self.GetQbits(nBits_conv , nBits_conv ['keep_negative']) encod_Qbits = self.GetQbits(nBits_encod, nBits_encod['keep_negative']) # keeping weights and bias same precision for now # define model x = inputs x = QActivation(input_Qbits, name='input_qa')(x) for i, n_nodes in enumerate(CNN_layer_nodes): if channels_first: x = QConv2D(n_nodes, CNN_kernel_size[i], activation='relu', padding='same', data_format='channels_first', name="conv2d_"+str(i)+"_m", kernel_quantizer=conv_Qbits, bias_quantizer=conv_Qbits)(x) else: x = QConv2D(n_nodes, CNN_kernel_size[i], activation='relu', padding='same', name="conv2d_"+str(i)+"_m", kernel_quantizer=conv_Qbits, bias_quantizer=conv_Qbits)(x) if CNN_pool[i]: if channels_first: x = MaxPooling2D((2, 2), padding='same', data_format='channels_first', name="mp_"+str(i))(x) else: x = MaxPooling2D((2, 2), padding='same', name="mp_"+str(i))(x) shape = K.int_shape(x) x = QActivation(accum_Qbits, name='accum1_qa')(x) x = Flatten(name="flatten")(x) # extended inputs fed forward to the dense layer # if self.extend: # inputs2 = Input(shape=(2,)) # maxQ, occupancy # input2_Qbits = self.GetQbits(nBits_input, keep_negative=1) #oddly fails if keep_neg=0 # input2_Qbits # x = inputs # x = QActivation(input_Qbits, name='input_qa')(x) # encoder dense nodes for i, n_nodes in enumerate(Dense_layer_nodes): x = QDense(n_nodes, activation='relu', name="en_dense_"+str(i), kernel_quantizer=dense_Qbits, bias_quantizer=dense_Qbits)(x) #x = QDense(encoded_dim, activation='relu', name='encoded_vector', # kernel_quantizer=dense_Qbits, bias_quantizer=dense_Qbits)(x) x = QDense(encoded_dim, activation=self.pams['activation'], name='encoded_vector', kernel_quantizer=dense_Qbits, bias_quantizer=dense_Qbits)(x) encodedLayer = QActivation(encod_Qbits, name='encod_qa')(x) # Instantiate Encoder Model self.encoder = Model(inputs, encodedLayer, name='encoder') if printSummary: self.encoder.summary() encoded_inputs = Input(shape=(encoded_dim,), name='decoder_input') x = encoded_inputs # decoder dense nodes for i, n_nodes in enumerate(Dense_layer_nodes): x = Dense(n_nodes, activation='relu', name="de_dense_"+str(i))(x) x = Dense(shape[1] * shape[2] * shape[3], activation='relu', name='de_dense_final')(x) x = Reshape((shape[1], shape[2], shape[3]),name="de_reshape")(x) for i, n_nodes in enumerate(CNN_layer_nodes): if CNN_pool[i]: if channels_first: x = UpSampling2D((2, 2), data_format='channels_first', name="up_"+str(i))(x) else: x = UpSampling2D((2, 2), name="up_"+str(i))(x) if channels_first: x = Conv2DTranspose(n_nodes, CNN_kernel_size[i], activation='relu', padding='same', data_format='channels_first', name="conv2D_t_"+str(i))(x) else: x = Conv2DTranspose(n_nodes, CNN_kernel_size[i], activation='relu', padding='same', name="conv2D_t_"+str(i))(x) if channels_first: # shape[0] will be # of channel x = Conv2DTranspose(filters=self.pams['shape'][0], kernel_size=CNN_kernel_size[0], padding='same', data_format='channels_first', name="conv2d_t_final")(x) else: x = Conv2DTranspose(filters=self.pams['shape'][2], kernel_size=CNN_kernel_size[0], padding='same', name="conv2d_t_final")(x) x = QActivation(input_Qbits, name='q_decoder_output')(x) #Verify this step needed? outputs = Activation('sigmoid', name='decoder_output')(x) self.decoder = Model(encoded_inputs, outputs, name='decoder') if printSummary: self.decoder.summary() self.autoencoder = Model(inputs, self.decoder(self.encoder(inputs)), name='autoencoder') if printSummary: self.autoencoder.summary() self.compileModels() CNN_layers = '' if len(CNN_layer_nodes) > 0: CNN_layers += '_Conv' for i, n in enumerate(CNN_layer_nodes): CNN_layers += f'_{n}x{CNN_kernel_size[i]}' if CNN_pool[i]: CNN_layers += 'pooled' Dense_layers = '' if len(Dense_layer_nodes) > 0: Dense_layers += '_Dense' for n in Dense_layer_nodes: Dense_layers += f'_{n}' self.name = f'Autoencoded{CNN_layers}{Dense_layers}_Encoded_{encoded_dim}' if not self.weights_f == '': self.autoencoder.load_weights(self.weights_f)
def build_discriminator(self): img = Input(shape=self.img_shape) ''' l1 = Input(shape=(64,)) #label1 = Embedding(10, 10 )(l1) #label2 = Embedding(10, 10 )(l2) n_nodes = 128* 128 label1 = Dense(n_nodes)(l1) label1 = Reshape((128, 128, 1))(label1) l2 = Input(shape=(64,)) label2 = Dense(n_nodes)(l2) label2 = Reshape((128, 128, 1))(label2) l3 = Input(shape=(64,)) #label3 = Embedding(10, 10 )(l3) label3 = Dense(n_nodes)(l3) label3 = Reshape((128, 128, 1))(label3) merge = Concatenate()([img, label1,label2,label3]) ''' l1 = Input(shape=(64,)) l2 = Input(shape=(64,)) l3 = Input(shape=(64,)) label =Concatenate()([ l1,l2,l3]) n_nodes = 128* 128 label = Dense(n_nodes)(label) label = Reshape((128, 128, 1))(label) merge = Concatenate()([img, label]) dis = Conv2D(16, kernel_size=3, strides=2, padding="same")(merge) dis = LeakyReLU(alpha=0.2)(dis) #dis = Dropout(0.25)(dis) dis = Conv2D(32, kernel_size=3, strides=2, padding="same")(dis) dis = LeakyReLU(alpha=0.2)(dis) #dis = Dropout(0.25)(dis) #dis = BatchNormalization(momentum=0.8)(dis) dis = Conv2D(64, kernel_size=3, strides=2, padding="same")(dis) dis = LeakyReLU(alpha=0.2)(dis) #dis = Dropout(0.25)(dis) #dis = BatchNormalization(momentum=0.8)(dis) dis = Conv2D(128, kernel_size=3, strides=2, padding="same")(dis) dis = LeakyReLU(alpha=0.2)(dis) #dis = Dropout(0.25)(dis) dis = Flatten()(dis) # Extract feature representation features = dis # Determine validity and label of the image validity = Dense(1, activation="sigmoid")(features) model = Model([img, l1,l2,l3], [validity]) model.compile(loss=self.loss, optimizer=self.optimizer, metrics=['accuracy']) return model
# (60000, 28, 28, 1) # (10000, 28, 28, 1) #2. 모델링 from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout from tensorflow.keras.layers import Reshape #4차원 shape도 Dense형태로 가능하나, 출력할 때 Flatten을 해야한다. model = Sequential() model.add(Dense(64, input_shape=(28, 28, 1))) model.add(Flatten()) model.add(Dense(16)) model.add(Dense(16)) model.add(Dense(784, activation='relu') ) # 아래 reshape해주는 레이어와 [0], [1], [2] 이 곱했을 때 맞아야함 노드의 갯수를 맞춰야함. model.add(Reshape((28, 28, 1))) #reshape시켜줌 : 연산레이어가 아닌 위에서 받은 레이어를 잘라주는 레이어 #디폴트 값이 있기 때문에 (())<-를 두개 사용 이 후 배울 예정 model.add(Dense(1)) #1로 출력 model.summary() #3. 컴파일, 훈련 from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau early_stopping = EarlyStopping(monitor='loss', patience=6, mode='auto') modelpath = '../data/modelCheckpoint/k45_mnist_{epoch:02d}-{val_loss:.4f}.hdf5' # k45_mnist_37_0100(0.0100).hdf5 cp = ModelCheckpoint(filepath=modelpath, monitor='val_loss', save_best_only=True, mode='auto') #filepath='(경로)' : 가중치를 세이브 해주는 루트
def color_delta_unet_model(img_shape, n_output_chans, model_name='color_delta_unet', enc_params=None, include_aux_input=False, aux_input_shape=None, do_warp_to_target_space=False): x_src = Input(img_shape, name='input_src') x_tgt = Input(img_shape, name='input_tgt') inputs = [x_src, x_tgt] if aux_input_shape is None: aux_input_shape = img_shape x_seg = Input(aux_input_shape, name='input_src_aux') inputs += [x_seg] if do_warp_to_target_space: # warp transformed vol to target space in the end n_dims = len(img_shape) - 1 flow_srctotgt = Input(img_shape[:-1] + (n_dims, ), name='input_flow') inputs += [flow_srctotgt] if include_aux_input: unet_inputs = [x_src, x_tgt, x_seg] unet_input_shape = img_shape[:-1] + (img_shape[-1] * 2 + aux_input_shape[-1], ) else: unet_inputs = [x_src, x_tgt] unet_input_shape = img_shape[:-1] + (img_shape[-1] * 2, ) x_stacked = Concatenate(axis=-1)(unet_inputs) n_dims = len(img_shape) - 1 if n_dims == 2: color_delta = unet2D( x_stacked, unet_input_shape, n_output_chans, nf_enc=enc_params['nf_enc'], nf_dec=enc_params['nf_dec'], n_convs_per_stage=enc_params['n_convs_per_stage'], ) conv_fn = Conv2D else: color_delta = unet3D( x_stacked, unet_input_shape, n_output_chans, nf_enc=enc_params['nf_enc'], nf_dec=enc_params['nf_dec'], n_convs_per_stage=enc_params['n_convs_per_stage'], ) conv_fn = Conv3D # last conv to get the output shape that we want color_delta = conv_fn(n_output_chans, kernel_size=3, padding='same', name='color_delta')(color_delta) transformed_out = Add(name='add_color_delta')([x_src, color_delta]) if do_warp_to_target_space: transformed_out = SpatialTransformer(indexing='xy')( [transformed_out, flow_srctotgt]) # hacky, but do a reshape so keras doesnt complain about returning an input x_seg = Reshape(aux_input_shape, name='aux')(x_seg) return Model(inputs=inputs, outputs=[transformed_out, color_delta, x_seg], name=model_name)
def get_test_model_exhaustive(): """Returns a exhaustive test model.""" input_shapes = [ (2, 3, 4, 5, 6), (2, 3, 4, 5, 6), (7, 8, 9, 10), (7, 8, 9, 10), (11, 12, 13), (11, 12, 13), (14, 15), (14, 15), (16,), (16,), (2,), (1,), (2,), (1,), (1, 3), (1, 4), (1, 1, 3), (1, 1, 4), (1, 1, 1, 3), (1, 1, 1, 4), (1, 1, 1, 1, 3), (1, 1, 1, 1, 4), (26, 28, 3), (4, 4, 3), (4, 4, 3), (4,), (2, 3), (1,), (1,), (1,), (2, 3), (9, 16, 1), (1, 9, 16) ] inputs = [Input(shape=s) for s in input_shapes] outputs = [] outputs.append(Conv1D(1, 3, padding='valid')(inputs[6])) outputs.append(Conv1D(2, 1, padding='same')(inputs[6])) outputs.append(Conv1D(3, 4, padding='causal', dilation_rate=2)(inputs[6])) outputs.append(ZeroPadding1D(2)(inputs[6])) outputs.append(Cropping1D((2, 3))(inputs[6])) outputs.append(MaxPooling1D(2)(inputs[6])) outputs.append(MaxPooling1D(2, strides=2, padding='same')(inputs[6])) outputs.append(MaxPooling1D(2, data_format="channels_first")(inputs[6])) outputs.append(AveragePooling1D(2)(inputs[6])) outputs.append(AveragePooling1D(2, strides=2, padding='same')(inputs[6])) outputs.append(AveragePooling1D(2, data_format="channels_first")(inputs[6])) outputs.append(GlobalMaxPooling1D()(inputs[6])) outputs.append(GlobalMaxPooling1D(data_format="channels_first")(inputs[6])) outputs.append(GlobalAveragePooling1D()(inputs[6])) outputs.append(GlobalAveragePooling1D(data_format="channels_first")(inputs[6])) outputs.append(Conv2D(4, (3, 3))(inputs[4])) outputs.append(Conv2D(4, (3, 3), use_bias=False)(inputs[4])) outputs.append(Conv2D(4, (2, 4), strides=(2, 3), padding='same')(inputs[4])) outputs.append(Conv2D(4, (2, 4), padding='same', dilation_rate=(2, 3))(inputs[4])) outputs.append(SeparableConv2D(3, (3, 3))(inputs[4])) outputs.append(DepthwiseConv2D((3, 3))(inputs[4])) outputs.append(DepthwiseConv2D((1, 2))(inputs[4])) outputs.append(MaxPooling2D((2, 2))(inputs[4])) # todo: check if TensorFlow >= 2.1 supports this #outputs.append(MaxPooling2D((2, 2), data_format="channels_first")(inputs[4])) # Default MaxPoolingOp only supports NHWC on device type CPU outputs.append(MaxPooling2D((1, 3), strides=(2, 3), padding='same')(inputs[4])) outputs.append(AveragePooling2D((2, 2))(inputs[4])) # todo: check if TensorFlow >= 2.1 supports this #outputs.append(AveragePooling2D((2, 2), data_format="channels_first")(inputs[4])) # Default AvgPoolingOp only supports NHWC on device type CPU outputs.append(AveragePooling2D((1, 3), strides=(2, 3), padding='same')(inputs[4])) outputs.append(GlobalAveragePooling2D()(inputs[4])) outputs.append(GlobalAveragePooling2D(data_format="channels_first")(inputs[4])) outputs.append(GlobalMaxPooling2D()(inputs[4])) outputs.append(GlobalMaxPooling2D(data_format="channels_first")(inputs[4])) outputs.append(Permute((3, 4, 1, 5, 2))(inputs[0])) outputs.append(Permute((1, 5, 3, 2, 4))(inputs[0])) outputs.append(Permute((3, 4, 1, 2))(inputs[2])) outputs.append(Permute((2, 1, 3))(inputs[4])) outputs.append(Permute((2, 1))(inputs[6])) outputs.append(Permute((1,))(inputs[8])) outputs.append(Permute((3, 1, 2))(inputs[31])) outputs.append(Permute((3, 1, 2))(inputs[32])) outputs.append(BatchNormalization()(Permute((3, 1, 2))(inputs[31]))) outputs.append(BatchNormalization()(Permute((3, 1, 2))(inputs[32]))) outputs.append(BatchNormalization()(inputs[0])) outputs.append(BatchNormalization(axis=1)(inputs[0])) outputs.append(BatchNormalization(axis=2)(inputs[0])) outputs.append(BatchNormalization(axis=3)(inputs[0])) outputs.append(BatchNormalization(axis=4)(inputs[0])) outputs.append(BatchNormalization(axis=5)(inputs[0])) outputs.append(BatchNormalization()(inputs[2])) outputs.append(BatchNormalization(axis=1)(inputs[2])) outputs.append(BatchNormalization(axis=2)(inputs[2])) outputs.append(BatchNormalization(axis=3)(inputs[2])) outputs.append(BatchNormalization(axis=4)(inputs[2])) outputs.append(BatchNormalization()(inputs[4])) # todo: check if TensorFlow >= 2.1 supports this #outputs.append(BatchNormalization(axis=1)(inputs[4])) # tensorflow.python.framework.errors_impl.InternalError: The CPU implementation of FusedBatchNorm only supports NHWC tensor format for now. outputs.append(BatchNormalization(axis=2)(inputs[4])) outputs.append(BatchNormalization(axis=3)(inputs[4])) outputs.append(BatchNormalization()(inputs[6])) outputs.append(BatchNormalization(axis=1)(inputs[6])) outputs.append(BatchNormalization(axis=2)(inputs[6])) outputs.append(BatchNormalization()(inputs[8])) outputs.append(BatchNormalization(axis=1)(inputs[8])) outputs.append(BatchNormalization()(inputs[27])) outputs.append(BatchNormalization(axis=1)(inputs[27])) outputs.append(BatchNormalization()(inputs[14])) outputs.append(BatchNormalization(axis=1)(inputs[14])) outputs.append(BatchNormalization(axis=2)(inputs[14])) outputs.append(BatchNormalization()(inputs[16])) # todo: check if TensorFlow >= 2.1 supports this #outputs.append(BatchNormalization(axis=1)(inputs[16])) # tensorflow.python.framework.errors_impl.InternalError: The CPU implementation of FusedBatchNorm only supports NHWC tensor format for now. outputs.append(BatchNormalization(axis=2)(inputs[16])) outputs.append(BatchNormalization(axis=3)(inputs[16])) outputs.append(BatchNormalization()(inputs[18])) outputs.append(BatchNormalization(axis=1)(inputs[18])) outputs.append(BatchNormalization(axis=2)(inputs[18])) outputs.append(BatchNormalization(axis=3)(inputs[18])) outputs.append(BatchNormalization(axis=4)(inputs[18])) outputs.append(BatchNormalization()(inputs[20])) outputs.append(BatchNormalization(axis=1)(inputs[20])) outputs.append(BatchNormalization(axis=2)(inputs[20])) outputs.append(BatchNormalization(axis=3)(inputs[20])) outputs.append(BatchNormalization(axis=4)(inputs[20])) outputs.append(BatchNormalization(axis=5)(inputs[20])) outputs.append(Dropout(0.5)(inputs[4])) outputs.append(ZeroPadding2D(2)(inputs[4])) outputs.append(ZeroPadding2D((2, 3))(inputs[4])) outputs.append(ZeroPadding2D(((1, 2), (3, 4)))(inputs[4])) outputs.append(Cropping2D(2)(inputs[4])) outputs.append(Cropping2D((2, 3))(inputs[4])) outputs.append(Cropping2D(((1, 2), (3, 4)))(inputs[4])) outputs.append(Dense(3, use_bias=True)(inputs[13])) outputs.append(Dense(3, use_bias=True)(inputs[14])) outputs.append(Dense(4, use_bias=False)(inputs[16])) outputs.append(Dense(4, use_bias=False, activation='tanh')(inputs[18])) outputs.append(Dense(4, use_bias=False)(inputs[20])) outputs.append(Reshape(((2 * 3 * 4 * 5 * 6),))(inputs[0])) outputs.append(Reshape((2, 3 * 4 * 5 * 6))(inputs[0])) outputs.append(Reshape((2, 3, 4 * 5 * 6))(inputs[0])) outputs.append(Reshape((2, 3, 4, 5 * 6))(inputs[0])) outputs.append(Reshape((2, 3, 4, 5, 6))(inputs[0])) outputs.append(Reshape((16,))(inputs[8])) outputs.append(Reshape((2, 8))(inputs[8])) outputs.append(Reshape((2, 2, 4))(inputs[8])) outputs.append(Reshape((2, 2, 2, 2))(inputs[8])) outputs.append(Reshape((2, 2, 1, 2, 2))(inputs[8])) outputs.append(UpSampling2D(size=(1, 2), interpolation='nearest')(inputs[4])) outputs.append(UpSampling2D(size=(5, 3), interpolation='nearest')(inputs[4])) outputs.append(UpSampling2D(size=(1, 2), interpolation='bilinear')(inputs[4])) outputs.append(UpSampling2D(size=(5, 3), interpolation='bilinear')(inputs[4])) for axis in [-5, -4, -3, -2, -1, 1, 2, 3, 4, 5]: outputs.append(Concatenate(axis=axis)([inputs[0], inputs[1]])) for axis in [-4, -3, -2, -1, 1, 2, 3, 4]: outputs.append(Concatenate(axis=axis)([inputs[2], inputs[3]])) for axis in [-3, -2, -1, 1, 2, 3]: outputs.append(Concatenate(axis=axis)([inputs[4], inputs[5]])) for axis in [-2, -1, 1, 2]: outputs.append(Concatenate(axis=axis)([inputs[6], inputs[7]])) for axis in [-1, 1]: outputs.append(Concatenate(axis=axis)([inputs[8], inputs[9]])) for axis in [-1, 2]: outputs.append(Concatenate(axis=axis)([inputs[14], inputs[15]])) for axis in [-1, 3]: outputs.append(Concatenate(axis=axis)([inputs[16], inputs[17]])) for axis in [-1, 4]: outputs.append(Concatenate(axis=axis)([inputs[18], inputs[19]])) for axis in [-1, 5]: outputs.append(Concatenate(axis=axis)([inputs[20], inputs[21]])) outputs.append(UpSampling1D(size=2)(inputs[6])) # outputs.append(UpSampling1D(size=2)(inputs[8])) # ValueError: Input 0 of layer up_sampling1d_1 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [None, 16] outputs.append(Multiply()([inputs[10], inputs[11]])) outputs.append(Multiply()([inputs[11], inputs[10]])) outputs.append(Multiply()([inputs[11], inputs[13]])) outputs.append(Multiply()([inputs[10], inputs[11], inputs[12]])) outputs.append(Multiply()([inputs[11], inputs[12], inputs[13]])) shared_conv = Conv2D(1, (1, 1), padding='valid', name='shared_conv', activation='relu') up_scale_2 = UpSampling2D((2, 2)) x1 = shared_conv(up_scale_2(inputs[23])) # (1, 8, 8) x2 = shared_conv(up_scale_2(inputs[24])) # (1, 8, 8) x3 = Conv2D(1, (1, 1), padding='valid')(up_scale_2(inputs[24])) # (1, 8, 8) x = Concatenate()([x1, x2, x3]) # (3, 8, 8) outputs.append(x) x = Conv2D(3, (1, 1), padding='same', use_bias=False)(x) # (3, 8, 8) outputs.append(x) x = Dropout(0.5)(x) outputs.append(x) x = Concatenate()([ MaxPooling2D((2, 2))(x), AveragePooling2D((2, 2))(x)]) # (6, 4, 4) outputs.append(x) x = Flatten()(x) # (1, 1, 96) x = Dense(4, use_bias=False)(x) outputs.append(x) x = Dense(3)(x) # (1, 1, 3) outputs.append(x) outputs.append(Add()([inputs[26], inputs[30], inputs[30]])) outputs.append(Subtract()([inputs[26], inputs[30]])) outputs.append(Multiply()([inputs[26], inputs[30], inputs[30]])) outputs.append(Average()([inputs[26], inputs[30], inputs[30]])) outputs.append(Maximum()([inputs[26], inputs[30], inputs[30]])) outputs.append(Concatenate()([inputs[26], inputs[30], inputs[30]])) intermediate_input_shape = (3,) intermediate_in = Input(intermediate_input_shape) intermediate_x = intermediate_in intermediate_x = Dense(8)(intermediate_x) intermediate_x = Dense(5, name='duplicate_layer_name')(intermediate_x) intermediate_model = Model( inputs=[intermediate_in], outputs=[intermediate_x], name='intermediate_model') intermediate_model.compile(loss='mse', optimizer='nadam') x = intermediate_model(x) # (1, 1, 5) intermediate_model_2 = Sequential() intermediate_model_2.add(Dense(7, input_shape=(5,))) intermediate_model_2.add(Dense(5, name='duplicate_layer_name')) intermediate_model_2.compile(optimizer='rmsprop', loss='categorical_crossentropy') x = intermediate_model_2(x) # (1, 1, 5) x = Dense(3)(x) # (1, 1, 3) shared_activation = Activation('tanh') outputs = outputs + [ Activation('tanh')(inputs[25]), Activation('hard_sigmoid')(inputs[25]), Activation('selu')(inputs[25]), Activation('sigmoid')(inputs[25]), Activation('softplus')(inputs[25]), Activation('softmax')(inputs[25]), Activation('softmax')(inputs[25]), Activation('relu')(inputs[25]), LeakyReLU()(inputs[25]), ELU()(inputs[25]), PReLU()(inputs[24]), PReLU()(inputs[25]), PReLU()(inputs[26]), shared_activation(inputs[25]), Activation('linear')(inputs[26]), Activation('linear')(inputs[23]), x, shared_activation(x), ] model = Model(inputs=inputs, outputs=outputs, name='test_model_exhaustive') model.compile(loss='mse', optimizer='nadam') # fit to dummy data training_data_size = 2 data_in = generate_input_data(training_data_size, input_shapes) initial_data_out = model.predict(data_in) data_out = generate_output_data(training_data_size, initial_data_out) model.fit(data_in, data_out, epochs=10) return model
from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, LSTM, Reshape, Dense, Dropout from load_data import * batch_size = 1 inputs = Input(shape = (sampled_data.shape[1],sampled_data.shape[2],sampled_data.shape[3]), batch_size = batch_size) conv2d_1 = Conv2D(filters = 32, kernel_size = (3,3), strides = 1, padding = "same", activation='relu')(inputs) conv2d_1 = MaxPool2D(pool_size = (2,2), padding = "same")(conv2d_1) conv2d_1 = Dropout(.3)(conv2d_1) conv2d_2 = Conv2D(filters = 64, kernel_size = (3,3), strides = 1, padding = "same", activation='relu')(conv2d_1) conv2d_2 = MaxPool2D(pool_size = (2,2), padding = "same")(conv2d_2) conv2d_2 = Dropout(.3)(conv2d_2) reshape = Reshape((222, -1))(conv2d_2) lstm = LSTM(200, return_sequences = False)(reshape) dense_1 = Dense(64, activation = "relu")(lstm) dense_2 = Dense(32, activation = "relu")(dense_1) outputs = Dense(len(set(labels)), activation = "softmax")(dense_2) rcnn = Model(inputs, outputs) rcnn.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy']) rcnn.summary() rcnn.fit(X_train, y_train, epochs=10, batch_size=1, validation_data=(X_val, y_val))
def get_model_v10(num_classes, preprocessing=None, bypass_type=None): """Create SqueezeNet architecture as described in [1], version 1.0. This is v1.0 implementation, it corresponds to original paper description. Check [2] for more implementation information. Parameters ---------- preprocessing : model Input preprocessing module that adapts input shape to what SqueezeNet expects. If None, SqueezeNet uses Conv2D and MaxPool as described in Table 1 [1] and input shape is fixed to 227x227x3. Preprocessing module must output data with size (55x55x96) which is input to first fire module (fire2 in Table 1 [1]). Example: input = Input(shape=(110, 110, 3)) output = Conv2D(96, (3, 3), (2,2))(input) preprocessing = Model(input, output) num_classes : int Number of classes to detect. bypass_type : {None, 'simple', 'complex'} Bypass type to be applied, see Fig. 2 [1] for more detail. Returns ------- model : model Keras model of SqueezeNet architecture. """ bypass = set_bypass(bypass_type=bypass_type) if preprocessing is None: inputs = Input(shape=(227, 227, 3)) net = default_preprocessor(inputs, version='1.0') else: pass # inputs = preprocessing.input # net = preprocessing.output # assert net.get_shape()[1:] == (55, 55, 96) net = fire_module(net, 'fire2', 16, bypass['fire1']) net = fire_module(net, 'fire3', 16, bypass['fire2']) net = fire_module(net, 'fire4', 32, bypass['fire3']) net = MaxPool2D(pool_size=(3, 3), strides=(2, 2))(net) net = fire_module(net, 'fire5', 32, bypass['fire5']) net = fire_module(net, 'fire6', 48, bypass['fire6']) net = fire_module(net, 'fire7', 48, bypass['fire7']) net = fire_module(net, 'fire8', 64, bypass['fire8']) net = MaxPool2D(pool_size=(3, 3), strides=(2, 2))(net) net = fire_module(net, 'fire9', 64, bypass=bypass['fire9']) net = Conv2D( filters=num_classes, kernel_size=(1, 1), strides=(1, 1), activation='relu', name='conv10', )(net) net = GlobalAveragePooling2D()(net) net = Reshape((num_classes, ))(net) net = Softmax()(net) model = Model(inputs, net) return model
def build_model(self): # Input layer inputs = Input(name='inputs', shape=self.input_shape_hwc, dtype='float32') # Convolution layer (VGG) y = Conv2D(16, (3, 3), padding='same', kernel_initializer='he_normal')(inputs) y = BatchNormalization()(y) y = Activation('relu')(y) y = MaxPooling2D(pool_size=(2, 2))(y) y = Conv2D(32, (3, 3), padding='same', kernel_initializer='he_normal')(y) y = BatchNormalization()(y) y = Activation('relu')(y) y = MaxPooling2D(pool_size=(2, 2))(y) y = Conv2D(32, (3, 3), padding='same', kernel_initializer='he_normal')(y) y = BatchNormalization()(y) y = Activation('relu')(y) y = MaxPooling2D(pool_size=(1, 2))(y) y = Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal')(y) y = BatchNormalization()(y) y = Activation('relu')(y) y = MaxPooling2D(pool_size=(1, 2))(y) y = Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal')(y) y = BatchNormalization()(y) y = Activation('relu')(y) y = MaxPooling2D(pool_size=(1, 2))(y) y = Conv2D(128, (3, 3), padding='same', kernel_initializer='he_normal')(y) y = BatchNormalization()(y) y = Activation('relu')(y) y = MaxPooling2D(pool_size=(1, 2))(y) # CNN to RNN y = Reshape(target_shape=((32, -1)))(y) y = Dense(128, activation='relu', kernel_initializer='he_normal')(y) # RNN layer lstm_1 = LSTM(128, return_sequences=True, kernel_initializer='he_normal')(y) lstm_1b = LSTM(128, return_sequences=True, go_backwards=True, kernel_initializer='he_normal')(y) lstm1_merged = add([lstm_1, lstm_1b]) lstm_2 = LSTM(128, return_sequences=True, kernel_initializer='he_normal')(lstm1_merged) lstm_2b = LSTM(128, return_sequences=True, go_backwards=True, kernel_initializer='he_normal')(lstm1_merged) lstm2_merged = concatenate([lstm_2, lstm_2b]) # transforms RNN output to character activations: y = Dense(self.class_num, kernel_initializer='he_normal')(lstm2_merged) y_pred = Activation('softmax', name='softmax')(y) labels = Input(name='labels', shape=[self.max_text_len], dtype='int64') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(self.ctc_func, output_shape=(1, ), name='ctc')( [y_pred, labels, input_length, label_length]) #(None, 1) train_model = Model( inputs=[inputs, labels, input_length, label_length], outputs=loss_out) predict_model = Model(inputs=[inputs], outputs=y_pred) return train_model, predict_model
def SegNet(): model = Sequential() #encoder model.add( Conv2D(64, (3, 3), strides=(1, 1), input_shape=(3, img_w, img_h), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(64, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2))) #(128,128) model.add( Conv2D(128, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(128, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2))) #(64,64) model.add( Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2))) #(32,32) model.add( Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2))) #(16,16) model.add( Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2))) #(8,8) #decoder model.add(UpSampling2D(size=(2, 2))) #(16,16) model.add( Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add(UpSampling2D(size=(2, 2))) #(32,32) model.add( Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add(UpSampling2D(size=(2, 2))) #(64,64) model.add( Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add(UpSampling2D(size=(2, 2))) #(128,128) model.add( Conv2D(128, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(128, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add(UpSampling2D(size=(2, 2))) #(256,256) model.add( Conv2D(64, (3, 3), strides=(1, 1), input_shape=(3, img_w, img_h), padding='same', activation='relu')) model.add(BatchNormalization()) model.add( Conv2D(64, (3, 3), strides=(1, 1), padding='same', activation='relu')) model.add(BatchNormalization()) model.add(Conv2D(n_label, (1, 1), strides=(1, 1), padding='same')) model.add(Reshape((n_label, img_w * img_h))) #axis=1和axis=2互换位置,等同于np.swapaxes(layer,1,2) model.add(Permute((2, 1))) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.summary() return model
# Shape info needed to build Decoder Model shape = K.int_shape(x) # Generate the latent vector x = Flatten()(x) latent = Dense(latent_dim, name='latent_vector')(x) # Instantiate Encoder Model encoder = Model(inputs, latent, name='encoder') encoder.summary() # Build the Decoder Model latent_inputs = Input(shape=(latent_dim,), name='decoder_input') x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs) x = Reshape((shape[1], shape[2], shape[3]))(x) # Stack of Transposed Conv2D blocks # Notes: # 1) Use Batch Normalization before ReLU on deep networks # 2) Use UpSampling2D as alternative to strides>1 # - faster but not as good as strides>1 for filters in layer_filters[::-1]: x = Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=2, activation='relu', padding='same')(x) x = Conv2DTranspose(filters=1, kernel_size=kernel_size,
# embedding size embedding_dim = 100 hidden_dim = 50 # Filter parameters filter_sizes = [5, 6, 7] num_filters = 30 vocabulary_size = len(dl.word_to_index_dict) print(vocabulary_size) #input inputs = Input(shape=(seq_len, ), dtype='int32') embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, input_length=seq_len)(inputs) reshape = Reshape((seq_len, embedding_dim, 1))(embedding) conv_0 = Conv2D(num_filters, kernel_size=(filter_sizes[0], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu', name='conv_0')(reshape) conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[1], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu', name='conv_1')(reshape) conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], embedding_dim),
def Deeplabv3pMobileNetV3Small(input_shape=(512, 512, 3), alpha=1.0, weights=None, input_tensor=None, classes=21, OS=8, **kwargs): """ Instantiates the Deeplabv3+ MobileNetV3Small architecture # Arguments input_shape: shape of input image. format HxWxC PASCAL VOC model was trained on (512,512,3) images alpha: controls the width of the MobileNetV3Small network. This is known as the width multiplier in the MobileNetV2 paper. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. weights: one of 'pascal_voc' (pre-trained on pascal voc) or None (random initialization) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. classes: number of desired classes. If classes != 21, last layer is initialized randomly OS: determines input_shape/feature_extractor_output ratio. One of {8,16}. Used only for xception backbone. # Returns A Keras model instance. """ if not (weights in {'pascal_voc', None}): raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `pascal_voc` ' '(pre-trained on PASCAL VOC)') if input_tensor is None: img_input = Input(shape=input_shape, name='image_input') else: img_input = input_tensor # normalize input image img_norm = Lambda(normalize, name='input_normalize')(img_input) # backbone body for feature extract x, skip_feature, backbone_len = MobileNetV3Small(include_top=False, input_tensor=img_norm, weights='imagenet', OS=OS, alpha=1.0) # ASPP block x = ASPP_block(x, OS) # Deeplabv3+ decoder for feature projection x = Decoder_block(x, skip_feature) # Final prediction conv block x = DeeplabConv2D(classes, (1, 1), padding='same', name='logits_semantic')(x) x = Lambda(img_resize, arguments={ 'size': (input_shape[0], input_shape[1]), 'mode': 'bilinear' }, name='pred_resize')(x) x = Reshape((input_shape[0] * input_shape[1], classes))(x) x = Softmax(name='Predictions/Softmax')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. #if input_tensor is not None: #inputs = get_source_inputs(input_tensor) #else: #inputs = img_input model = Model(img_input, x, name='deeplabv3p_mobilenetv3small') return model, backbone_len
def Deeplabv3pXception(input_shape=(512, 512, 3), weights='pascal_voc', input_tensor=None, classes=21, OS=16, **kwargs): """ Instantiates the Deeplabv3+ architecture Optionally loads weights pre-trained on PASCAL VOC. This model is available for TensorFlow only, and can only be used with inputs following the TensorFlow data format `(width, height, channels)`. # Arguments input_shape: shape of input image. format HxWxC PASCAL VOC model was trained on (512,512,3) images weights: one of 'pascal_voc' (pre-trained on pascal voc) or None (random initialization) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. classes: number of desired classes. If classes != 21, last layer is initialized randomly OS: determines input_shape/feature_extractor_output ratio. One of {8,16}. Used only for xception backbone. # Returns A Keras model instance. # Raises RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. ValueError: in case of invalid argument for `weights` or `backbone` """ if not (weights in {'pascal_voc', None}): raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `pascal_voc` ' '(pre-trained on PASCAL VOC)') if input_tensor is None: img_input = Input(shape=input_shape, name='image_input') else: img_input = input_tensor # normalize input image img_norm = Lambda(normalize, name='input_normalize')(img_input) # backbone body for feature extract x, skip_feature, backbone_len = Xception_body(img_norm, OS) # ASPP block x = ASPP_block(x, OS) # Deeplabv3+ decoder for feature projection x = Decoder_block(x, skip_feature) # Final prediction conv block x = DeeplabConv2D(classes, (1, 1), padding='same', name='logits_semantic')(x) x = Lambda(img_resize, arguments={ 'size': (input_shape[0], input_shape[1]), 'mode': 'bilinear' }, name='pred_resize')(x) x = Reshape((input_shape[0] * input_shape[1], classes))(x) x = Softmax(name='Predictions/Softmax')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. #if input_tensor is not None: #inputs = get_source_inputs(input_tensor) #else: #inputs = img_input model = Model(img_input, x, name='deeplabv3p_xception') # load weights if weights == 'pascal_voc': weights_path = get_file( 'deeplabv3_xception_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH_X, cache_subdir='models') model.load_weights(weights_path, by_name=True) return model, backbone_len
def gru_model(): emb_n = 64 category_num = { 'adidmd5': (780369, emb_n), 'idfamd5': (360, emb_n), 'imeimd5': (1021836, emb_n), 'macmd5': (329184, emb_n), 'openudidmd5': (85051, emb_n), 'ip': (813719, emb_n), 'reqrealip': (9748, emb_n), 'adunitshowid': (800, emb_n), 'apptype': (91, emb_n), 'carrier': (4, emb_n), 'city': (331, emb_n), 'dvctype': (3, emb_n), 'model': (5923, emb_n), # 7957 7958 5922 'make': (1704, emb_n), 'mediashowid': (313, emb_n), 'ntt': (7, emb_n), 'orientation': (2, emb_n), 'osv': (185, emb_n), 'pkgname': (2368, emb_n), 'ppi': (119, emb_n), 'ver': (3268, emb_n), 'screen_area': (1396, emb_n), 'creative_dpi': (1763, emb_n), 'hour': (24, emb_n), 'lan': (33, emb_n), 'h': (985, emb_n), 'w': (449, emb_n), } # 类别型变量输入 category_inp = Input(shape=(len(category),), name='category_inp') cat_embeds = [] for idx, col in enumerate(category): x = Lambda(lambda x: x[:, idx, None])(category_inp) x = Embedding(category_num[col][0], category_num[col][1], input_length=1)(x) cat_embeds.append(x) embeds = concatenate(cat_embeds, axis=2) embeds = GaussianDropout(0.5)(embeds) # 数值型变量输入 numerical_inp = Input(shape=(len(numerical),), name='continous_inp') print('numerical', len(numerical) // 8 * 8 + 8) x2 = Dense(len(numerical) // 8 + 8, activation='relu', kernel_initializer='random_uniform', bias_initializer='zeros')( numerical_inp) x2 = Dropout(0.5)(x2) x2 = BatchNormalization()(x2) x2 = Reshape([1, int(x2.shape[1])])(x2) x = concatenate([embeds, x2], axis=2) # 主干网络 x = CuDNNGRU(128)(x) x = BatchNormalization()(x) x = Dropout(0.50)(x) x = Dense(64, activation='relu', kernel_initializer='random_uniform')(x) x = PReLU()(x) x = BatchNormalization()(x) x = Dropout(0.50)(x) x = Dense(32, activation='relu', kernel_initializer='random_uniform')(x) x = PReLU()(x) x = BatchNormalization()(x) x = Dropout(0.50)(x) out_p = Dense(1, activation='sigmoid')(x) return Model(inputs=[category_inp, numerical_inp], outputs=out_p)
def G_block(self, res): block_name = '%dx%d' % (2**res, 2**res) # res = 2 ... resolution_log2 if res == 2: # 4x4 # Linear block # Gain is overridden to match the original implementation # sqrt(2) / 4 was used with He init projecting_layer = self.dense( units=np.prod(self.projecting_target_shape), gain=self.gain / self.projecting_gain_correction) linear_layers = [ projecting_layer, Reshape(target_shape=self.projecting_target_shape, dtype=self.policy), self.act() ] if self.use_bias: linear_layers = self.apply_bias(linear_layers) if self.use_pixelnorm: linear_layers = self.PN(linear_layers) linear_block = tf.keras.Sequential(linear_layers, name='Projecting') # Conv block conv_layers = [ self.conv2d(fmaps=self.G_n_filters(res - 1)), self.act() ] if self.use_bias: conv_layers = self.apply_bias(conv_layers) if self.use_pixelnorm: conv_layers = self.PN(conv_layers) conv_block = tf.keras.Sequential(conv_layers, name='Conv') # Full block block_model = tf.keras.Sequential([linear_block, conv_block], name=block_name) else: # 8x8 and up # 1st conv block if self.G_fused_scale: conv0_layers = [ self.conv2d(fmaps=self.G_n_filters(res - 1), fused_up=True), self.act() ] if self.use_bias: conv0_layers = self.apply_bias(conv0_layers) if self.use_pixelnorm: conv0_layers = self.PN(conv0_layers) conv0_block = tf.keras.Sequential(conv0_layers, name='Conv0_up') block_layers = [conv0_block] else: conv0_layers = [ self.conv2d(fmaps=self.G_n_filters(res - 1)), self.act() ] if self.use_bias: conv0_layers = self.apply_bias(conv0_layers) if self.use_pixelnorm: conv0_layers = self.PN(conv0_layers) conv0_block = tf.keras.Sequential(conv0_layers, name='Conv0') block_layers = [self.up_layers[res], conv0_block] # 2nd conv block conv1_layers = [ self.conv2d(fmaps=self.G_n_filters(res - 1)), self.act() ] if self.use_bias: conv1_layers = self.apply_bias(conv1_layers) if self.use_pixelnorm: conv1_layers = self.PN(conv1_layers) conv1_block = tf.keras.Sequential(conv1_layers, name='Conv1') # Full block block_layers += [conv1_block] block_model = tf.keras.Sequential(block_layers, name=block_name) return block_model
def func(labels): labels = RepeatVector(size * size)(labels) labels = Reshape((size, size, 1))(labels) return labels
# In[4]: # 载入预训练的resnet50模型 resnet50 = ResNet50(weights='imagenet', include_top=False, input_shape=(height, width, 3)) # In[5]: # 设置输入 image_input = Input((height, width, 3), name='image_input') # 使用resnet50进行特征提取 x = resnet50(image_input) # 搭建RNN网络 x = Reshape((10, 2048))(x) x = Bidirectional(GRU(RNN_cell, return_sequences=True))(x) x = Bidirectional(GRU(RNN_cell, return_sequences=True))(x) x = Dense(num_classes, activation='softmax')(x) # 定义模型 model = Model(image_input, x) # In[6]: # 定义标签输入 labels = Input(shape=(max_len), name='max_len') # 输入长度 input_len = Input(shape=(1), name='input_len') # 标签长度 label_len = Input(shape=(1), name='label_len') # Lambda的作用是可以将自定义的函数封装到网络中,用于自定义的一些数据计算处理
def _build(self): ### THE ENCODER encoder_input = Input(shape=self.input_dim, name='encoder_input') x = encoder_input for i in range(self.n_layers_encoder): conv_layer = Conv2D(filters=self.encoder_conv_filters[i], kernel_size=self.encoder_conv_kernel_size[i], strides=self.encoder_conv_strides[i], padding='same', name='encoder_conv_' + str(i)) x = conv_layer(x) if self.use_batch_norm: x = BatchNormalization()(x) x = LeakyReLU()(x) if self.use_dropout: x = Dropout(rate=0.25)(x) shape_before_flattening = K.int_shape(x)[1:] x = Flatten()(x) self.mu = Dense(self.z_dim, name='mu')(x) self.log_var = Dense(self.z_dim, name='log_var')(x) self.z = Sampling(name='encoder_output')([self.mu, self.log_var]) self.encoder = Model(encoder_input, [self.mu, self.log_var, self.z], name='encoder') ### THE DECODER decoder_input = Input(shape=(self.z_dim, ), name='decoder_input') x = Dense(np.prod(shape_before_flattening))(decoder_input) x = Reshape(shape_before_flattening)(x) for i in range(self.n_layers_decoder): conv_t_layer = Conv2DTranspose( filters=self.decoder_conv_t_filters[i], kernel_size=self.decoder_conv_t_kernel_size[i], strides=self.decoder_conv_t_strides[i], padding='same', name='decoder_conv_t_' + str(i)) x = conv_t_layer(x) if i < self.n_layers_decoder - 1: if self.use_batch_norm: x = BatchNormalization()(x) x = LeakyReLU()(x) if self.use_dropout: x = Dropout(rate=0.25)(x) else: x = Activation('sigmoid')(x) decoder_output = x self.decoder = Model(decoder_input, decoder_output, name='decoder') ### THE FULL VAE self.model = VAEModel(self.encoder, self.decoder, self.r_loss_factor)
def CreateModel(self): ''' 定义CNN/LSTM/CTC模型,使用函数式模型 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) 隐藏层一:3*3卷积层 隐藏层二:池化层,池化窗口大小为2 隐藏层三:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 隐藏层四:循环层、LSTM/GRU层 隐藏层五:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 隐藏层六:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, 输出层:自定义层,即CTC层,使用CTC的loss作为损失函数,实现连接性时序多输出 ''' # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500 input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) layer_h1 = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 layer_h1 = Dropout(0.1)(layer_h1) layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 layer_h3 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h2) # 池化层 #layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 layer_h3 = Dropout(0.2)(layer_h3) layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 layer_h4 = Dropout(0.2)(layer_h4) layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 layer_h6 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h5) # 池化层 layer_h6 = Dropout(0.3)(layer_h6) layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 layer_h7 = Dropout(0.3)(layer_h7) layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 layer_h9 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h8) # 池化层 layer_h9 = Dropout(0.3)(layer_h9) layer_h10 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层 layer_h10 = Dropout(0.4)(layer_h10) layer_h11 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层 layer_h12 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层 #test=Model(inputs = input_data, outputs = layer_h6) #test.summary() layer_h13 = Reshape((200, 3200))(layer_h12) #Reshape层 layer_h13 = Dropout(0.4)(layer_h13) layer_h14 = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h13) # 全连接层 layer_h14 = Dropout(0.4)(layer_h14) inner = layer_h14 #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 rnn_size = 128 gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) gru1_merged = add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) gru2 = concatenate([gru_2, gru_2b]) #layer_h12 = GRU(128,activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True)(layer_h11) layer_h15 = Dropout(0.4)(gru2) layer_h16 = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h15) # 全连接层 layer_h16 = Dropout(0.5)(layer_h16) # 随机中断部分神经网络连接,防止过拟合 layer_h17 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h16) # 全连接层 y_pred = Activation('softmax', name='Activation0')(layer_h17) model_data = Model(inputs=input_data, outputs=y_pred) #model_data.summary() labels = Input(name='the_labels', shape=[self.label_max_string_length], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ), name='ctc')( [y_pred, labels, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model.summary() # clipnorm seems to speeds up convergence #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06) #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=ada_d) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) print('[*提示] 创建模型成功,模型编译成功') return model, model_data
def build_model(image_size, n_classes, mode='training', l2_regularization=0.0, min_scale=0.1, max_scale=0.9, scales=None, aspect_ratios_global=[0.5, 1.0, 2.0], aspect_ratios_per_layer=None, two_boxes_for_ar1=True, steps=None, offsets=None, clip_boxes=False, variances=[1.0, 1.0, 1.0, 1.0], coords='centroids', normalize_coords=False, subtract_mean=None, divide_by_stddev=None, swap_channels=False, confidence_thresh=0.01, iou_threshold=0.45, top_k=200, nms_max_output_size=400, return_predictor_sizes=False): ''' Build a Keras model with SSD architecture, see references. The model consists of convolutional feature layers and a number of convolutional predictor layers that take their input from different feature layers. The model is fully convolutional. The implementation found here is a smaller version of the original architecture used in the paper (where the base network consists of a modified VGG-16 extended by a few convolutional feature layers), but of course it could easily be changed to an arbitrarily large SSD architecture by following the general design pattern used here. This implementation has 7 convolutional layers and 4 convolutional predictor layers that take their input from layers 4, 5, 6, and 7, respectively. Most of the arguments that this function takes are only needed for the anchor box layers. In case you're training the network, the parameters passed here must be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading trained weights, the parameters passed here must be the same as the ones used to produce the trained weights. Some of these arguments are explained in more detail in the documentation of the `SSDBoxEncoder` class. Note: Requires Keras v2.0 or later. Training currently works only with the TensorFlow backend (v1.0 or later). Arguments: image_size (tuple): The input image size in the format `(height, width, channels)`. n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO. mode (str, optional): One of 'training', 'inference' and 'inference_fast'. In 'training' mode, the model outputs the raw prediction tensor, while in 'inference' and 'inference_fast' modes, the raw predictions are decoded into absolute coordinates and filtered via confidence thresholding, non-maximum suppression, and top-k filtering. The difference between latter two modes is that 'inference' follows the exact procedure of the original Caffe implementation, while 'inference_fast' uses a faster prediction decoding procedure. l2_regularization (float, optional): The L2-regularization rate. Applies to all convolutional layers. min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. All scaling factors between the smallest and the largest will be linearly interpolated. Note that the second to last of the linearly interpolated scaling factors will actually be the scaling factor for the last predictor layer, while the last scaling factor is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer. This list must be one element longer than the number of predictor layers. The first `k` elements are the scaling factors for the `k` predictor layers, while the last element is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional last scaling factor must be passed either way, even if it is not being used. If a list is passed, this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero. aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be generated. This list is valid for all predictor layers. The original implementation uses more aspect ratios for some predictor layers and fewer for others. If you want to do that, too, then use the next argument instead. aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each predictor layer. This allows you to set the aspect ratios for each predictor layer individually. If a list is passed, it overrides `aspect_ratios_global`. two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise. If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated using the scaling factor for the respective layer, the second one will be generated using geometric mean of said scaling factor and next bigger scaling factor. steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over the image. If the list contains ints/floats, then that value will be used for both spatial dimensions. If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`. If no steps are provided, then they will be computed such that the anchor box center points will form an equidistant grid within the image dimensions. offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be either floats or tuples of two floats. These numbers represent for each predictor layer how many pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions of the step size specified in the `steps` argument. If the list contains floats, then that value will be used for both spatial dimensions. If the list contains tuples of two floats, then they represent `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size, which is also the recommended setting. clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries. variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by its respective variance value. coords (str, optional): The box coordinate format to be used internally by the model (i.e. this is not the input format of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates, i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values of any shape that is broadcast-compatible with the image shape. The elements of this array will be subtracted from the image pixel intensity values. For example, pass a list of three integers to perform per-channel mean normalization for color images. divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or floating point values of any shape that is broadcast-compatible with the image shape. The image pixel intensity values will be divided by the elements of this array. For example, pass a list of three integers to perform per-channel standard deviation normalization for color images. swap_channels (list, optional): Either `False` or a list of integers representing the desired order in which the input image channels should be swapped. confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific positive class in order to be considered for the non-maximum suppression stage for the respective class. A lower value will result in a larger part of the selection process being done by the non-maximum suppression stage, while a larger value will result in a larger part of the selection process happening in the confidence thresholding stage. iou_threshold (float, optional): A float in [0,1]. All boxes that have a Jaccard similarity of greater than `iou_threshold` with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers to the box's confidence score. top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the non-maximum suppression stage. nms_max_output_size (int, optional): The maximal number of predictions that will be left over after the NMS stage. return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since you can always get their sizes easily via the Keras API, but it's convenient and less error-prone to get them this way. They are only relevant for training anyway (SSDBoxEncoder needs to know the spatial dimensions of the predictor layers), for inference you don't need them. Returns: model: The Keras SSD model. predictor_sizes (optional): A Numpy array containing the `(height, width)` portion of the output tensor shape for each convolutional predictor layer. During training, the generator function needs this in order to transform the ground truth labels into tensors of identical structure as the output tensors of the model, which is in turn needed for the cost function. References: https://arxiv.org/abs/1512.02325v5 ''' n_predictor_layers = 4 # The number of predictor conv layers in the network n_classes += 1 # Account for the background class. l2_reg = l2_regularization # Make the internal name shorter. img_height, img_width, img_channels = image_size[0], image_size[ 1], image_size[2] ############################################################################ # Get a few exceptions out of the way. ############################################################################ if aspect_ratios_global is None and aspect_ratios_per_layer is None: raise ValueError( "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified." ) if aspect_ratios_per_layer: if len(aspect_ratios_per_layer) != n_predictor_layers: raise ValueError( "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}." .format(n_predictor_layers, len(aspect_ratios_per_layer))) if (min_scale is None or max_scale is None) and scales is None: raise ValueError( "Either `min_scale` and `max_scale` or `scales` need to be specified." ) if scales: if len(scales) != n_predictor_layers + 1: raise ValueError( "It must be either scales is None or len(scales) == {}, but len(scales) == {}." .format(n_predictor_layers + 1, len(scales))) else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1) if len( variances ) != 4: # We need one variance value for each of the four box coordinates raise ValueError( "4 variance values must be pased, but {} values were received.". format(len(variances))) variances = np.array(variances) if np.any(variances <= 0): raise ValueError( "All variances must be >0, but the variances given are {}".format( variances)) if (not (steps is None)) and (len(steps) != n_predictor_layers): raise ValueError( "You must provide at least one step value per predictor layer.") if (not (offsets is None)) and (len(offsets) != n_predictor_layers): raise ValueError( "You must provide at least one offset value per predictor layer.") ############################################################################ # Compute the anchor box parameters. ############################################################################ # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. if aspect_ratios_per_layer: aspect_ratios = aspect_ratios_per_layer else: aspect_ratios = [aspect_ratios_global] * n_predictor_layers # Compute the number of boxes to be predicted per cell for each predictor layer. # We need this so that we know how many channels the predictor layers need to have. if aspect_ratios_per_layer: n_boxes = [] for ar in aspect_ratios_per_layer: if (1 in ar) & two_boxes_for_ar1: n_boxes.append(len(ar) + 1) # +1 for the second box for aspect ratio 1 else: n_boxes.append(len(ar)) else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer if (1 in aspect_ratios_global) & two_boxes_for_ar1: n_boxes = len(aspect_ratios_global) + 1 else: n_boxes = len(aspect_ratios_global) n_boxes = [n_boxes] * n_predictor_layers if steps is None: steps = [None] * n_predictor_layers if offsets is None: offsets = [None] * n_predictor_layers ############################################################################ # Define functions for the Lambda layers below. ############################################################################ def identity_layer(tensor): return tensor def input_mean_normalization(tensor): return tensor - np.array(subtract_mean) def input_stddev_normalization(tensor): return tensor / np.array(divide_by_stddev) def input_channel_swap(tensor): if len(swap_channels) == 3: return K.stack([ tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]] ], axis=-1) elif len(swap_channels) == 4: return K.stack([ tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]], tensor[..., swap_channels[3]] ], axis=-1) ############################################################################ # Build the network. ############################################################################ x = Input(shape=(img_height, img_width, img_channels)) # The following identity layer is only needed so that the subsequent lambda layers can be optional. x1 = Lambda(identity_layer, output_shape=(img_height, img_width, img_channels), name='identity_layer')(x) if not (subtract_mean is None): x1 = Lambda(input_mean_normalization, output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1) if not (divide_by_stddev is None): x1 = Lambda(input_stddev_normalization, output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1) if swap_channels: x1 = Lambda(input_channel_swap, output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1) conv1 = Conv2D(32, (5, 5), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1')(x1) conv1 = BatchNormalization(axis=3, momentum=0.99, name='bn1')( conv1 ) # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3 conv1 = ELU(name='elu1')(conv1) pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1) conv2 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2')(pool1) conv2 = BatchNormalization(axis=3, momentum=0.99, name='bn2')(conv2) conv2 = ELU(name='elu2')(conv2) pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2) conv3 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3')(pool2) conv3 = BatchNormalization(axis=3, momentum=0.99, name='bn3')(conv3) conv3 = ELU(name='elu3')(conv3) pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3) conv4 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4')(pool3) conv4 = BatchNormalization(axis=3, momentum=0.99, name='bn4')(conv4) conv4 = ELU(name='elu4')(conv4) pool4 = MaxPooling2D(pool_size=(2, 2), name='pool4')(conv4) conv5 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5')(pool4) conv5 = BatchNormalization(axis=3, momentum=0.99, name='bn5')(conv5) conv5 = ELU(name='elu5')(conv5) pool5 = MaxPooling2D(pool_size=(2, 2), name='pool5')(conv5) conv6 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6')(pool5) conv6 = BatchNormalization(axis=3, momentum=0.99, name='bn6')(conv6) conv6 = ELU(name='elu6')(conv6) pool6 = MaxPooling2D(pool_size=(2, 2), name='pool6')(conv6) conv7 = Conv2D(32, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7')(pool6) conv7 = BatchNormalization(axis=3, momentum=0.99, name='bn7')(conv7) conv7 = ELU(name='elu7')(conv7) # The next part is to add the convolutional predictor layers on top of the base network # that we defined above. Note that I use the term "base network" differently than the paper does. # To me, the base network is everything that is not convolutional predictor layers or anchor # box layers. In this case we'll have four predictor layers, but of course you could # easily rewrite this into an arbitrarily deep base network and add an arbitrary number of # predictor layers on top of the base network by simply following the pattern shown here. # Build the convolutional predictor layers on top of conv layers 4, 5, 6, and 7. # We build two predictor layers on top of each of these layers: One for class prediction (classification), one for box coordinate prediction (localization) # We precidt `n_classes` confidence values for each box, hence the `classes` predictors have depth `n_boxes * n_classes` # We predict 4 box coordinates for each box, hence the `boxes` predictors have depth `n_boxes * 4` # Output shape of `classes`: `(batch, height, width, n_boxes * n_classes)` classes4 = Conv2D(n_boxes[0] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes4')(conv4) classes5 = Conv2D(n_boxes[1] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes5')(conv5) classes6 = Conv2D(n_boxes[2] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes6')(conv6) classes7 = Conv2D(n_boxes[3] * n_classes, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes7')(conv7) # Output shape of `boxes`: `(batch, height, width, n_boxes * 4)` boxes4 = Conv2D(n_boxes[0] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes4')(conv4) boxes5 = Conv2D(n_boxes[1] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes5')(conv5) boxes6 = Conv2D(n_boxes[2] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes6')(conv6) boxes7 = Conv2D(n_boxes[3] * 4, (3, 3), strides=(1, 1), padding="same", kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes7')(conv7) # Generate the anchor boxes # Output shape of `anchors`: `(batch, height, width, n_boxes, 8)` anchors4 = AnchorBoxes(img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[0], this_offsets=offsets[0], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors4')(boxes4) anchors5 = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[1], this_offsets=offsets[1], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors5')(boxes5) anchors6 = AnchorBoxes(img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[2], this_offsets=offsets[2], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors6')(boxes6) anchors7 = AnchorBoxes(img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[3], this_offsets=offsets[3], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors7')(boxes7) # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)` # We want the classes isolated in the last axis to perform softmax on them classes4_reshaped = Reshape((-1, n_classes), name='classes4_reshape')(classes4) classes5_reshaped = Reshape((-1, n_classes), name='classes5_reshape')(classes5) classes6_reshaped = Reshape((-1, n_classes), name='classes6_reshape')(classes6) classes7_reshaped = Reshape((-1, n_classes), name='classes7_reshape')(classes7) # Reshape the box coordinate predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)` # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss boxes4_reshaped = Reshape((-1, 4), name='boxes4_reshape')(boxes4) boxes5_reshaped = Reshape((-1, 4), name='boxes5_reshape')(boxes5) boxes6_reshaped = Reshape((-1, 4), name='boxes6_reshape')(boxes6) boxes7_reshaped = Reshape((-1, 4), name='boxes7_reshape')(boxes7) # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)` anchors4_reshaped = Reshape((-1, 8), name='anchors4_reshape')(anchors4) anchors5_reshaped = Reshape((-1, 8), name='anchors5_reshape')(anchors5) anchors6_reshaped = Reshape((-1, 8), name='anchors6_reshape')(anchors6) anchors7_reshaped = Reshape((-1, 8), name='anchors7_reshape')(anchors7) # Concatenate the predictions from the different layers and the assosciated anchor box tensors # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions, # so we want to concatenate along axis 1 # Output shape of `classes_concat`: (batch, n_boxes_total, n_classes) classes_concat = Concatenate(axis=1, name='classes_concat')([ classes4_reshaped, classes5_reshaped, classes6_reshaped, classes7_reshaped ]) # Output shape of `boxes_concat`: (batch, n_boxes_total, 4) boxes_concat = Concatenate(axis=1, name='boxes_concat')( [boxes4_reshaped, boxes5_reshaped, boxes6_reshaped, boxes7_reshaped]) # Output shape of `anchors_concat`: (batch, n_boxes_total, 8) anchors_concat = Concatenate(axis=1, name='anchors_concat')([ anchors4_reshaped, anchors5_reshaped, anchors6_reshaped, anchors7_reshaped ]) # The box coordinate predictions will go into the loss function just the way they are, # but for the class predictions, we'll apply a softmax activation layer first classes_softmax = Activation('softmax', name='classes_softmax')(classes_concat) # Concatenate the class and box coordinate predictions and the anchors to one large predictions tensor # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8) predictions = Concatenate(axis=2, name='predictions')( [classes_softmax, boxes_concat, anchors_concat]) if mode == 'training': model = Model(inputs=x, outputs=predictions) elif mode == 'inference': decoded_predictions = DecodeDetections( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) elif mode == 'inference_fast': decoded_predictions = DecodeDetectionsFast( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) else: raise ValueError( "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'." .format(mode)) if return_predictor_sizes: # The spatial dimensions are the same for the `classes` and `boxes` predictor layers. predictor_sizes = np.array([ classes4._keras_shape[1:3], classes5._keras_shape[1:3], classes6._keras_shape[1:3], classes7._keras_shape[1:3] ]) return model, predictor_sizes else: return model
def build_cifar10_generator(ngf=64, z_dim=128): """ Builds CIFAR10 DCGAN Generator Model PARAMS ------ ngf: number of generator filters z_dim: number of dimensions in latent vector RETURN ------ G: keras sequential """ init = initializers.RandomNormal(stddev=0.02) G = Sequential() # Dense 1: 2x2x512 G.add( Dense(2 * 2 * ngf * 8, input_shape=(z_dim, ), use_bias=True, kernel_initializer=init)) G.add(Reshape((2, 2, ngf * 8))) G.add(BatchNormalization()) G.add(LeakyReLU(0.2)) # Conv 1: 4x4x256 G.add( Conv2DTranspose(ngf * 4, kernel_size=5, strides=2, padding='same', use_bias=True, kernel_initializer=init)) G.add(BatchNormalization()) G.add(LeakyReLU(0.2)) # Conv 2: 8x8x128 G.add( Conv2DTranspose(ngf * 2, kernel_size=5, strides=2, padding='same', use_bias=True, kernel_initializer=init)) G.add(BatchNormalization()) G.add(LeakyReLU(0.2)) # Conv 3: 16x16x64 G.add( Conv2DTranspose(ngf, kernel_size=5, strides=2, padding='same', use_bias=True, kernel_initializer=init)) G.add(BatchNormalization()) G.add(LeakyReLU(0.2)) # Conv 4: 32x32x3 G.add( Conv2DTranspose(3, kernel_size=5, strides=2, padding='same', use_bias=True, kernel_initializer=init)) G.add(Activation('tanh')) print("\nGenerator") G.summary() return G
batch_tr = return_batch(0, a, b, c, d, e) # initialize outside generators for fit_model gen_ts = gen_test(number) gen_tr = gen_train(number) #model inp = Input(shape=(513,30,1),batch_size=32) layer_conv1 = Conv2D(filters=50, kernel_size=(513,1), padding='valid')(inp) layer_conv2 = Conv2D(filters=50, kernel_size=(1,15), padding='valid')(layer_conv1) layer_flat = Flatten()(layer_conv2) layer_dense = Dense(units=128, activation='relu')(layer_flat) b1 = Dense(units=int(layer_flat.shape[1]), activation='relu')(layer_dense) b1 = Reshape(target_shape=(int(layer_conv2.shape[1]),int(layer_conv2.shape[2]),int(layer_conv2.shape[3])))(b1) b1 = Conv2DTranspose(filters=50, kernel_size=(1,15), padding='valid')(b1) b1 = Conv2DTranspose(filters=1, kernel_size=(513,1), padding='valid')(b1) b2 = Dense(units=int(layer_flat.shape[1]), activation='relu')(layer_dense) b2 = Reshape(target_shape=(int(layer_conv2.shape[1]),int(layer_conv2.shape[2]),int(layer_conv2.shape[3])))(b2) b2 = Conv2DTranspose(filters=50, kernel_size=(1,15), padding='valid')(b2) b2 = Conv2DTranspose(filters=1, kernel_size=(513,1), padding='valid')(b2) b3 = Dense(units=int(layer_flat.shape[1]), activation='relu')(layer_dense) b3 = Reshape(target_shape=(int(layer_conv2.shape[1]),int(layer_conv2.shape[2]),int(layer_conv2.shape[3])))(b3) b3 = Conv2DTranspose(filters=50, kernel_size=(1,15), padding='valid')(b3) b3 = Conv2DTranspose(filters=1, kernel_size=(513,1), padding='valid')(b3)
def get_model(args): model_name = args.model_architecture label_count = 12 model_settings = prepare_model_settings(label_count, args) if model_name == "fc4": model = tf.keras.models.Sequential([ tf.keras.layers.Flatten( input_shape=(model_settings['spectrogram_length'], model_settings['dct_coefficient_count'])), tf.keras.layers.Dense(256, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.BatchNormalization(), tf.keras.layers.Dense(256, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.BatchNormalization(), tf.keras.layers.Dense(256, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.BatchNormalization(), tf.keras.layers.Dense(model_settings['label_count'], activation="softmax") ]) elif model_name == 'ds_cnn': print("DS CNN model invoked") input_shape = [ model_settings['spectrogram_length'], model_settings['dct_coefficient_count'], 1 ] filters = 64 weight_decay = 1e-4 regularizer = l2(weight_decay) final_pool_size = (int(input_shape[0] / 2), int(input_shape[1] / 2)) # Model layers # Input pure conv2d inputs = Input(shape=input_shape) x = Conv2D(filters, (10, 4), strides=(2, 2), padding='same', kernel_regularizer=regularizer)(inputs) x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(rate=0.2)(x) # First layer of separable depthwise conv2d # Separable consists of depthwise conv2d followed by conv2d with 1x1 kernels x = DepthwiseConv2D(depth_multiplier=1, kernel_size=(3, 3), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(filters, (1, 1), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) # Second layer of separable depthwise conv2d x = DepthwiseConv2D(depth_multiplier=1, kernel_size=(3, 3), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(filters, (1, 1), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) # Third layer of separable depthwise conv2d x = DepthwiseConv2D(depth_multiplier=1, kernel_size=(3, 3), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(filters, (1, 1), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) # Fourth layer of separable depthwise conv2d x = DepthwiseConv2D(depth_multiplier=1, kernel_size=(3, 3), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(filters, (1, 1), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) # Reduce size and apply final softmax x = Dropout(rate=0.4)(x) x = AveragePooling2D(pool_size=final_pool_size)(x) x = Flatten()(x) outputs = Dense(model_settings['label_count'], activation='softmax')(x) # Instantiate model. model = Model(inputs=inputs, outputs=outputs) elif model_name == 'td_cnn': print("TD CNN model invoked") input_shape = [ model_settings['spectrogram_length'], model_settings['dct_coefficient_count'], 1 ] print(f"Input shape = {input_shape}") filters = 64 weight_decay = 1e-4 regularizer = l2(weight_decay) # Model layers # Input time-domain conv inputs = Input(shape=input_shape) x = Conv2D(filters, (512, 1), strides=(384, 1), padding='valid', kernel_regularizer=regularizer)(inputs) x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(rate=0.2)(x) x = Reshape((41, 64, 1))(x) # True conv x = Conv2D(filters, (10, 4), strides=(2, 2), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Dropout(rate=0.2)(x) # First layer of separable depthwise conv2d # Separable consists of depthwise conv2d followed by conv2d with 1x1 kernels # First layer of separable depthwise conv2d # Separable consists of depthwise conv2d followed by conv2d with 1x1 kernels x = DepthwiseConv2D(depth_multiplier=1, kernel_size=(3, 3), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(filters, (1, 1), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) # Second layer of separable depthwise conv2d x = DepthwiseConv2D(depth_multiplier=1, kernel_size=(3, 3), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(filters, (1, 1), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) # Third layer of separable depthwise conv2d x = DepthwiseConv2D(depth_multiplier=1, kernel_size=(3, 3), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(filters, (1, 1), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) # Fourth layer of separable depthwise conv2d x = DepthwiseConv2D(depth_multiplier=1, kernel_size=(3, 3), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(filters, (1, 1), padding='same', kernel_regularizer=regularizer)(x) x = BatchNormalization()(x) x = Activation('relu')(x) # Reduce size and apply final softmax x = Dropout(rate=0.4)(x) # x = AveragePooling2D(pool_size=(25,5))(x) x = GlobalAveragePooling2D()(x) x = Flatten()(x) outputs = Dense(model_settings['label_count'], activation='softmax')(x) # Instantiate model. model = Model(inputs=inputs, outputs=outputs) else: raise ValueError("Model name {:} not supported".format(model_name)) model.compile( #optimizer=keras.optimizers.RMSprop(learning_rate=args.learning_rate), # Optimizer optimizer=keras.optimizers.Adam( learning_rate=args.learning_rate), # Optimizer # Loss function to minimize loss=keras.losses.SparseCategoricalCrossentropy(), # List of metrics to monitor metrics=[keras.metrics.SparseCategoricalAccuracy()], ) return model
def ResNet_autoencoder(height, width, depth, latentDim=16): X_input = Input(shape=(height, width, depth)) X = X_input # encoder Stage 1 X = Conv2D(32, (3, 3), strides=(2, 2), name='conv1-1', padding='same', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name='bn_conv1-1')(X) X = Activation('relu')(X) X = Conv2D(32, (1, 1), strides=(1, 1), name='conv1-2', padding='same', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name='bn_conv1-2')(X) skip_connect_1 = X X = Activation('relu')(X) # encoder Stage 2 X = Conv2D(64, (3, 3), strides=(2, 2), name='conv2-1', padding='same', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name='bn_conv2-1')(X) X = Activation('relu')(X) X = Conv2D(64, (1, 1), strides=(1, 1), name='conv2-2', padding='same', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name='bn_conv2-2')(X) skip_connect_2 = X X = Activation('relu')(X) # encoder Stage 3 X, _ = convolutional_block(X, f=3, filters=[64, 64, 128], stage=3, block='a', s=2) X, skip_connect_3 = identity_block(X, 3, [64, 64, 128], stage=3, block='b') # encoder Stage 4 X, _ = convolutional_block(X, f=3, filters=[128, 128, 256], stage=4, block='a', s=2) X, skip_connect_4 = identity_block(X, 3, [128, 128, 256], stage=4, block='b') # latent-space representation volumeSize = K.int_shape(X) X = Flatten()(X) latent = Dense(latentDim)(X) # encoder = Model(X_input, latent, name="encoder") # latentInputs = Input(shape=(latentDim,)) X = Dense(np.prod(volumeSize[1:]))(latent) X = Reshape((volumeSize[1], volumeSize[2], volumeSize[3]))(X) # # decoder Stage 1 X = Concatenate()([X, skip_connect_4]) X, _ = identity_block_transpose(X, 3, [512, 256, 512], stage=5, block='b') X = Conv2DTranspose(256, (1, 1), strides=(1, 1), padding='same', kernel_initializer=glorot_uniform(seed=0))(X) X, _ = convolutional_block_transpose(X, f=3, filters=[256, 128, 128], stage=5, block='a', s=2) # X = Cropping2D(cropping=((1, 0), (0, 0)), data_format=None)(X) # decoder Stage 2 X = Concatenate()([X, skip_connect_3]) X, _ = identity_block_transpose(X, 3, [256, 128, 256], stage=6, block='b') X = Conv2DTranspose(256, (1, 1), strides=(1, 1), padding='same', kernel_initializer=glorot_uniform(seed=0))(X) X, _ = convolutional_block_transpose(X, f=3, filters=[128, 64, 64], stage=6, block='a', s=2) X = Cropping2D(cropping=((1, 0), (0, 0)), data_format=None)(X) # # # decoder Stage 3 X = Concatenate()([X, skip_connect_2]) X = Conv2DTranspose(128, (1, 1), strides=(1, 1), name='conv7-1', padding='same', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name='bn_conv7-1')(X) X = Activation('relu')(X) X = Conv2DTranspose(64, (3, 3), strides=(2, 2), name='conv7-2', padding='same', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name='bn_conv7-2')(X) X = Activation('relu')(X) X = Cropping2D(cropping=((1, 0), (0, 0)), data_format=None)(X) # # decoder Stage 4 X = Concatenate()([X, skip_connect_1]) X = Conv2DTranspose(64, (1, 1), strides=(1, 1), name='conv8-1', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name='bn_conv8-1')(X) X = Activation('relu')(X) X = Conv2DTranspose(32, (1, 1), strides=(1, 1), name='conv8-2', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name='bn_conv8-2')(X) X = Conv2DTranspose(1, (3, 3), strides=(2, 2), padding="same")(X) outputs = Activation("sigmoid")(X) autoencoder = Model(inputs=X_input, outputs=outputs, name='ResNet_autoencoder') # print(model.summary()) return autoencoder
def generator(self): # Input size = 100 inputs = Input(shape=(100, )) x = Dense(4 * 4 * 1024, input_shape=(100, ))(inputs) x = Reshape(target_shape=(4, 4, 1024))(x) x = BatchNormalization()(x) x = LeakyReLU(0.02)(x) # Output size = 4x4x1024 # Input size = 4x4x1024 x = Conv2D(filters=512, kernel_size=5, padding='same', use_bias=False)(x) x = BatchNormalization()(x) x = LeakyReLU(0.02)(x) x = UpSampling2D()(x) # Output size = 8x8x512 # Input size = 8x8x512 x = Conv2D(filters=256, kernel_size=5, padding='same', use_bias=False)(x) x = BatchNormalization()(x) x = LeakyReLU(0.02)(x) x = UpSampling2D()(x) # Output size = 16x16x256 # Input size = 16x16x512 x = Conv2D(filters=256, kernel_size=5, padding='same', use_bias=False)(x) x = BatchNormalization()(x) x = LeakyReLU(0.02)(x) # Output size = 16x16x256 # Input size = 16x16x256 x = Conv2D(filters=128, kernel_size=5, padding='same', use_bias=False)(x) x = BatchNormalization()(x) x = LeakyReLU(0.02)(x) x = UpSampling2D()(x) # Output size = 32x32x128 # Input size = 32x32x256 x = Conv2D(filters=128, kernel_size=5, padding='same', use_bias=False)(x) x = BatchNormalization()(x) x = LeakyReLU(0.02)(x) x = UpSampling2D()(x) # Output size = 64x64x128 # Input size = 64x64x256 x = Conv2D(filters=128, kernel_size=5, padding='same', use_bias=False)(x) x = BatchNormalization()(x) x = LeakyReLU(0.02)(x) x = UpSampling2D()(x) # Output size = 128x128x128 # Input size = 128x128x256 x = Conv2D(filters=128, kernel_size=5, padding='same', use_bias=False)(x) x = BatchNormalization()(x) x = LeakyReLU(0.02)(x) # Output size = 128x128x128 # Input size = 128x128x128 x = Conv2D(filters=3, kernel_size=5, padding='same', use_bias=False)(x) out = Activation('tanh')(x) # Output size = 32x32x3 net = Model(inputs=inputs, outputs=out) return net
def pointnet_cls(include_top=True, weights=None, input_tensor=None, input_shape=(2048, 3), pooling=None, classes=40, activation=None, use_tnet=True): """ PointNet model for object classification :param include_top: whether to include the stack of fully connected layers :param weights: one of `None` (random initialization), 'modelnet' (pre-training on ModelNet), or the path to the weights file to be loaded. :param input_tensor: optional tensor of size BxNxK :param input_shape: shape of the input point clouds (NxK) :param pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 2D tensor output of the last convolutional block (Nx1024). - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 1D tensor of size 1024. - `max` means that global max pooling will be applied. :param classes: number of classes in the classification problem; if dict, construct multiple disjoint top layers :param activation: activation of the last layer (default None). :param use_tnet: whether to use the transformation subnets or not. :return: Keras model of the classification network """ assert K.image_data_format() == 'channels_last' # Generate input tensor and get base network if input_tensor is None: input_tensor = Input(input_shape, name='Input_cloud') num_point = input_tensor.shape[-2] net = pointnet_base(input_tensor, use_tnet) # Top layers if include_top: # Symmetric function: max pooling # Done in 2D since 1D is painfully slow net = MaxPooling2D(pool_size=(num_point, 1), padding='valid', name='maxpool')(Lambda(K.expand_dims)(net)) net = Reshape((1024, ))(net) if isinstance(classes, dict): # Disjoint stacks of fc layers, one per value in dict net = [ dense_bn(net, units=512, scope=r + '_fc1', activation='relu') for r in classes ] net = [ Dropout(0.3, name=r + '_dp1')(n) for r, n in zip(classes, net) ] net = [ dense_bn(n, units=256, scope=r + '_fc2', activation='relu') for r, n in zip(classes, net) ] net = [ Dropout(0.3, name=r + '_dp2')(n) for r, n in zip(classes, net) ] net = [ Dense(units=classes[r], activation=activation, name=r)(n) for r, n in zip(classes, net) ] else: # Fully connected layers for a single classification task net = dense_bn(net, units=512, scope='fc1', activation='relu') net = Dropout(0.3, name='dp1')(net) net = dense_bn(net, units=256, scope='fc2', activation='relu') net = Dropout(0.3, name='dp2')(net) net = Dense(units=classes, name='fc3', activation=activation)(net) else: if pooling == 'avg': net = MaxPooling2D(pool_size=(num_point, 1), padding='valid', name='maxpool')(Lambda(K.expand_dims)(net)) net = Reshape((net.shape[-2], ))(net) elif pooling == 'max': net = AveragePooling2D(pool_size=(num_point, 1), padding='valid', name='avgpool')(Lambda(K.expand_dims)(net)) net = Reshape((net.shape[-2], ))(net) model = Model(input_tensor, net, name='pointnet_cls') # Load weights. if weights == 'modelnet': weights_path = keras_utils.get_file( 'pointnet_modelnet_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models') model.load_weights(weights_path, by_name=True) if K.backend() == 'theano': keras_utils.convert_all_kernels_in_model(model) elif weights is not None: model.load_weights(weights, by_name=True) return model
sSAE_encoder.layers[5].set_weights( autoencoder_3.layers[2].get_weights()) # third Dense sSAE_encoder.layers[6].set_weights( autoencoder_3.layers[3].get_weights()) # third BN encoded_train = sSAE_encoder.predict(train) encoded_test = sSAE_encoder.predict(test) np.save('data/encoded_train.npy', encoded_train) np.save('data/train_label.npy', train_label) np.save('data/encoded_test.npy', encoded_test) np.save('data/test_label.npy', test_label) # 级联两层Dense 最后加一个softmax mlp0 = Dense(units=32, activation='relu')(sSAE_encoder.output) lstm_reshape = Reshape((1, 32))(mlp0) lstm = LSTM(units=16, activation='tanh', return_sequences=False)(lstm_reshape) lstm_drop = Dropout(0.3)(lstm) mlp = Dense(units=10, activation='relu')(lstm_drop) mlp2 = Dense(units=1, activation='sigmoid')(mlp) classifier = Model(sSAE_encoder.input, mlp2) optimize = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8) classifier.compile(optimizer=optimize, loss='binary_crossentropy', metrics=['accuracy']) save_dir = os.path.join(os.getcwd(), 'saved_models_temp') filepath = "best_model.hdf5"
def init_model(self): """ Build the UNet model with the specified input image shape. """ inputs = Input(shape=self.img_shape) # Apply regularization if not None or 0 kr = regularizers.l2(self.l2_reg) if self.l2_reg else None """ Encoding path """ filters = 64 in_ = inputs residual_connections = [] for i in range(self.depth): conv = Conv3D(int(filters*self.cf), self.kernel_size, activation=self.activation, padding=self.padding, kernel_regularizer=kr)(in_) conv = Conv3D(int(filters * self.cf), self.kernel_size, activation=self.activation, padding=self.padding, kernel_regularizer=kr)(conv) bn = BatchNormalization()(conv) in_ = MaxPooling3D(pool_size=(2, 2, 2))(bn) # Update filter count and add bn layer to list for residual conn. filters *= 2 residual_connections.append(bn) """ Bottom (no max-pool) """ conv = Conv3D(int(filters * self.cf), self.kernel_size, activation=self.activation, padding=self.padding, kernel_regularizer=kr)(in_) conv = Conv3D(int(filters * self.cf), self.kernel_size, activation=self.activation, padding=self.padding, kernel_regularizer=kr)(conv) bn = BatchNormalization()(conv) """ Up-sampling """ residual_connections = residual_connections[::-1] for i in range(self.depth): # Reduce filter count filters /= 2 # Up-sampling block # Note: 2x2 filters used for backward comp, but you probably # want to use 3x3 here instead. up = UpSampling3D(size=(2, 2, 2))(bn) conv = Conv3D(int(filters * self.cf), 2, activation=self.activation, padding=self.padding, kernel_regularizer=kr)(up) bn = BatchNormalization()(conv) # Crop and concatenate cropped_res = self.crop_nodes_to_match(residual_connections[i], bn) merge = Concatenate(axis=-1)([cropped_res, bn]) conv = Conv3D(int(filters * self.cf), self.kernel_size, activation=self.activation, padding=self.padding, kernel_regularizer=kr)(merge) conv = Conv3D(int(filters * self.cf), self.kernel_size, activation=self.activation, padding=self.padding, kernel_regularizer=kr)(conv) bn = BatchNormalization()(conv) """ Output modeling layer """ out = Conv3D(self.n_classes, 1, activation=self.out_activation)(bn) if self.flatten_output: out = Reshape([np.prod(self.img_shape[:3]), self.n_classes], name='flatten_output')(out) return [inputs], [out]