def inception_block(x, filters=256): shrinkaged_filters = int(filters * INCEPTION_ENABLE_DEPTHWISE_SEPARABLE_CONV_SHRINKAGE) b0 = conv_bn_relu(x, filters=filters, kernel_size=(1, 1, 1)) b1 = conv_bn_relu(x, filters=shrinkaged_filters, kernel_size=(1, 1, 1)) b1 = conv_bn_relu(b1, filters=filters, kernel_size=(3, 3, 3)) b2 = conv_bn_relu(x, filters=shrinkaged_filters, kernel_size=(1, 1, 1)) b2 = conv_bn_relu(b2, filters=filters, kernel_size=(3, 3, 3)) b2 = conv_bn_relu(b2, filters=filters, kernel_size=(3, 3, 3)) b3 = AveragePooling3D(pool_size=(3, 3, 3), strides=(1, 1, 1), padding='same')(x) b3 = conv_bn_relu(b3, filters=filters, kernel_size=(1, 1, 1)) bs = [b0, b1, b2, b3] print('inception_block') print(b0.get_shape()) print(b1.get_shape()) print(b2.get_shape()) print(b3.get_shape()) if INCEPTION_ENABLE_SPATIAL_SEPARABLE_CONV: b4 = conv_bn_relu(x, filters=shrinkaged_filters, kernel_size=(1, 1, 1)) b4 = conv_bn_relu(b4, filters=filters, kernel_size=(5, 1, 1)) b4 = conv_bn_relu(b4, filters=filters, kernel_size=(1, 5, 1)) b4 = conv_bn_relu(b4, filters=filters, kernel_size=(1, 1, 5)) bs.append(b4) print(b4.get_shape()) x = Concatenate(axis=4)(bs) print(x.get_shape()) return x
def reduction_block(x, filters=256): b0 = conv_bn_relu(x, filters=filters, kernel_size=(3, 3, 3), strides=(2, 2, 2), padding='same') b1 = conv_bn_relu(x, filters=filters, kernel_size=(1, 1, 1)) b1 = conv_bn_relu(b1, filters=filters, kernel_size=(3, 3, 3)) b1 = conv_bn_relu(b1, filters=filters, kernel_size=(3, 3, 3), strides=(2, 2, 2), padding='same') b2 = MaxPooling3D(pool_size=(3, 3, 3), strides=(2, 2, 2), padding='same')(x) b2 = conv_bn_relu(b2, filters=filters, kernel_size=(1, 1, 1)) bs = [b0, b1, b2] print('reduction_block') print(b0.get_shape()) print(b1.get_shape()) print(b2.get_shape()) if INCEPTION_ENABLE_SPATIAL_SEPARABLE_CONV: b3 = conv_bn_relu(x, filters=filters, kernel_size=(1, 1, 1)) b3 = conv_bn_relu(b3, filters=filters, kernel_size=(5, 1, 1)) b3 = conv_bn_relu(b3, filters=filters, kernel_size=(1, 5, 1)) b3 = conv_bn_relu(b3, filters=filters, kernel_size=(1, 1, 5)) b3 = conv_bn_relu(b3, filters=filters, kernel_size=(3, 3, 3), strides=(2, 2, 2), padding='same') bs.append(b3) print(b3.get_shape()) x = Concatenate(axis=4)(bs) print(x.get_shape()) return x
def build_model_combine_features(self, load_weight=False): cnn_branch = Sequential() cnn_branch.add( Conv2D(filters=16, kernel_size=5, strides=1, padding='valid', activation='relu', input_shape=(11, 11, 3), name='Conv1')) cnn_branch.add(Conv2D(filters=24, kernel_size=3, strides=1, padding='same', activation='relu', name='Conv2')) cnn_branch.add(Conv2D(filters=32, kernel_size=3, strides=1, padding='same', activation='relu', name='Conv3')) cnn_branch.add(MaxPooling2D(pool_size=(3, 3), strides=2)) cnn_branch.add(Conv2D(filters=64, kernel_size=3, strides=1, padding='same', activation='relu', name='Conv4')) cnn_branch.add(MaxPooling2D(pool_size=(3, 3), strides=2)) cnn_branch.add(Conv2D(filters=96, kernel_size=3, strides=1, padding='same', activation='relu', name='Conv5')) cnn_branch.add(Flatten()) location_branch = Sequential() location_branch.add(Dense(2, input_shape=(2,), activation='relu')) model = Concatenate([location_branch, cnn_branch]) model.add(Dense(500, activation='relu')) model.add(Dense(2, activation='softmax')) model.compile(optimizer=Adam(lr=self.lr), loss='categorical_crossentropy', metrics=['accuracy']) if load_weight: print("Loading weight...") model.load_weight(WEIGHT_DIR + "") print("Weight loaded.") return model
def inception_base(x): x = conv_bn_relu(x, filters=32) x = conv_bn_relu(x, filters=32) x = conv_bn_relu(x, filters=64) b0 = MaxPooling3D(pool_size=(2, 2, 2))(x) b1 = conv_bn_relu(x, 64, strides=(2, 2, 2)) x = Concatenate(axis=4)([b0, b1]) print('inception_base') print(b0.get_shape()) print(b1.get_shape()) print(x.get_shape()) return x
def dense_block(x): print('dense block') print(x.get_shape()) for _ in range(DENSE_NET_BLOCK_LAYERS): y = x if DENSE_NET_ENABLE_BOTTLENETCK: y = bn_relu_conv(y, filters=DENSE_NET_GROWTH_RATE, kernel_size=(1, 1, 1)) y = bn_relu_conv(y, filters=DENSE_NET_GROWTH_RATE, kernel_size=(3, 3, 3)) x = Concatenate(axis=4)([x, y]) print(x.get_shape()) return x
def use_pretrained_model(self): # For using pre-trained model # duplicate version of train function except for model.fit ########### ### ENCODER embedding_layer = Embedding( params['LEN_WORD2IDX_INPUTS'] + 1, params['EMBEDDING_DIM'], #weights=[embedding_matrix], input_length=params['MAX_LEN_INPUT'], # trainable=True ) encoder_inputs_placeholder = Input(shape=(params['MAX_LEN_INPUT'], )) x = embedding_layer(encoder_inputs_placeholder) encoder = Bidirectional( LSTM( params['LATENT_DIM'], return_sequences=True, # dropout=0.5 # dropout not available on gpu )) encoder_outputs = encoder(x) ########### ### DECODER # this word embedding will not use pre-trained vectors, although you could decoder_embedding = Embedding(params['LEN_WORD2IDX_OUTPUTS'] + 1, params['EMBEDDING_DIM']) decoder_inputs_placeholder = Input( shape=(params['MAX_LEN_TARGET'], )) # teacher forcing input decoder_inputs_x = decoder_embedding(decoder_inputs_placeholder) ############# ### ATTENTION # Attention layers need to be global (전역 변수) because they will be repeated Ty times at the decoder attn_repeat_layer = RepeatVector(params['MAX_LEN_INPUT']) attn_concat_layer = Concatenate(axis=-1) attn_dense1 = Dense(10, activation='tanh') attn_dense2 = Dense(1, activation=self._softmax_over_time) attn_dot = Dot( axes=1) # to perform the weighted sum of alpha[t] * h[t] def _one_step_attention(h, st_1): # h = h(1), ..., h(Tx), shape = (Tx, LATENT_DIM * 2) # st_1 = s(t-1), shape = (LATENT_DIM_DECODER,) # copy s(t-1) Tx times # now shape = (Tx, LATENT_DIM_DECODER) st_1 = attn_repeat_layer(st_1) # Concatenate all h(t)'s with s(t-1) # Now of shape (Tx, LATENT_DIM_DECODER + LATENT_DIM * 2) x = attn_concat_layer([h, st_1]) # Neural net first layer x = attn_dense1(x) # Neural net second layer with special softmax over time alphas = attn_dense2(x) # "Dot" the alphas and the h's # Remember a.dot(b) = sum over a[t] * b[t] context = attn_dot([alphas, h]) return context # define the rest of the decoder (after attention) decoder_lstm = LSTM(params['LATENT_DIM_DECODER'], return_state=True) decoder_dense = Dense(params['LEN_WORD2IDX_OUTPUTS'] + 1, activation='softmax') initial_s = Input(shape=(params['LATENT_DIM_DECODER'], ), name='s0') initial_c = Input(shape=(params['LATENT_DIM_DECODER'], ), name='c0') context_last_word_concat_layer = Concatenate( axis=2) # for teacher forcing # Unlike previous seq2seq, we cannot get the output all in one step # Instead we need to do Ty steps And in each of those steps, we need to consider all Tx h's # s, c will be re-assigned in each iteration of the loop s = initial_s c = initial_c # collect outputs in a list at first outputs = [] # 원래 LSTM은 내부적으로 아래와 같은 for문을 진행하지만, 여기서 우리는 Context를 계산하기 위해서 manual하게 for문을 구성함. for t in range(params['MAX_LEN_TARGET']): # Ty times ###################################################### ## `one_step_attention` function ! # get the context using attention context = _one_step_attention(encoder_outputs, s) # we need a different layer for each time step selector = Lambda( lambda x: x[:, t:t + 1] ) # 해당 time 벡터만 추출. 우리는 layer-wise로 코딩해야 되기 때문에 lambda를 사용. xt = selector(decoder_inputs_x) # combine decoder_lstm_input = context_last_word_concat_layer([context, xt]) # pass the combined [context, last word] into the LSTM # along with [s, c] # get the new [s, c] and output o, s, c = decoder_lstm(decoder_lstm_input, initial_state=[s, c]) # final dense layer to get next word prediction decoder_outputs = decoder_dense(o) outputs.append(decoder_outputs) # make it a layer stacker = Lambda(self._stack_and_transpose) outputs = stacker(outputs) ################ ### CREATE MODEL self.model = Model(inputs=[ encoder_inputs_placeholder, decoder_inputs_placeholder, initial_s, initial_c, ], outputs=outputs) # compile the model self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) ## load pretrained model self.model.load_weights(PRETRAINED_MODEL_PATH) ##### Make predictions ##### # As with the poetry example, we need to create another model # that can take in the RNN state and previous word as input # and accept a T=1 sequence. #encoder_inputs_placeholder = Input(shape=(params['MAX_LEN_INPUT'],)) # The encoder will be stand-alone # From this we will get our initial decoder hidden state # i.e. h(1), ..., h(Tx) self.encoder_model = Model(encoder_inputs_placeholder, encoder_outputs) # next we define a T=1 decoder model encoder_outputs_as_input = Input(shape=( params['MAX_LEN_INPUT'], params['LATENT_DIM'] * 2, )) decoder_inputs_single = Input(shape=(1, )) decoder_inputs_single_x = decoder_embedding(decoder_inputs_single) # no need to loop over attention steps this time because there is only one step context = _one_step_attention(encoder_outputs_as_input, initial_s) # combine context with last word decoder_lstm_input = context_last_word_concat_layer( [context, decoder_inputs_single_x]) # lstm and final dense o, s, c = decoder_lstm(decoder_lstm_input, initial_state=[initial_s, initial_c]) decoder_outputs = decoder_dense(o) # note: we don't really need the final stack and tranpose # because there's only 1 output # it is already of size N x D # no need to make it 1 x N x D --> N x 1 x D # time dimension이 1이기 때문에 자동으로 없어짐: 따라서, stack_and_transpose함수가 필요없음. # create the model object self.decoder_model = Model(inputs=[ decoder_inputs_single, encoder_outputs_as_input, initial_s, initial_c ], outputs=[decoder_outputs, s, c])
def build_model_alt(self, num_layers, n_base_filters, deconvolution, use_bn=False): """ Create a 3D Unet model with a variable number of layers and initial number of filters :param num_layers: number of layers (i.e. number of skip connections + 1) :param n_base_filters: number of filters to use in the first conv layer :param deconvolution: True for Deconvolution3D, False for UpSampling3D :param use_bn: True to use BatchNormalisation, False otherwise :return: Keras model """ POOL_SIZE = (2, 2, 2) POOL_STRIDE = (2, 2, 2) CONV_KERNEL = (3, 3, 3) CONV_STRIDE = (1, 1, 1) DECONV_KERNEL = (2, 2, 2) DECONV_STRIDE = (2, 2, 2) UPSAMPLE_SIZE = (2, 2, 2) FEATURE_AXIS = -1 self._title = "UNet3D_{}layer_{}flt_deconv{}".format( num_layers, n_base_filters, int(deconvolution)) self._title += "_BN" if use_bn else "" inputs = self._input current_layer = inputs layers = list() # Contracting path for layer_ix in range(num_layers): # Two conv layers, note the difference in the number of filters contr_conv1 = Conv3D(filters=n_base_filters * (2**layer_ix), kernel_size=CONV_KERNEL, strides=CONV_STRIDE, padding="same", activation="relu", kernel_initializer="he_normal")(current_layer) if use_bn: contr_conv1 = BatchNormalization( axis=FEATURE_AXIS)(contr_conv1) contr_conv2 = Conv3D(filters=n_base_filters * (2**layer_ix) * 2, kernel_size=CONV_KERNEL, strides=CONV_STRIDE, padding="same", activation="relu", kernel_initializer="he_normal")(contr_conv1) if use_bn: contr_conv2 = BatchNormalization( axis=FEATURE_AXIS)(contr_conv2) # Do not include maxpooling in the final bottom layer if layer_ix < num_layers - 1: current_layer = MaxPooling3D(pool_size=POOL_SIZE, strides=POOL_STRIDE, padding="same")(contr_conv2) layers.append([contr_conv1, contr_conv2, current_layer]) else: current_layer = contr_conv2 layers.append([contr_conv1, contr_conv2]) # Expanding path for layer_ix in range(num_layers - 2, -1, -1): if deconvolution: exp_deconv = Deconvolution3D( filters=current_layer._keras_shape[-1], kernel_size=DECONV_KERNEL, strides=DECONV_STRIDE)(current_layer) else: exp_deconv = UpSampling3D(size=UPSAMPLE_SIZE)(current_layer) concat_layer = Concatenate(axis=FEATURE_AXIS)( [exp_deconv, layers[layer_ix][1]]) current_layer = Conv3D( filters=layers[layer_ix][1]._keras_shape[FEATURE_AXIS], kernel_size=CONV_KERNEL, strides=CONV_STRIDE, padding="same", activation="relu", kernel_initializer="he_normal")(concat_layer) if use_bn: current_layer = BatchNormalization( axis=FEATURE_AXIS)(current_layer) current_layer = Conv3D( filters=layers[layer_ix][1]._keras_shape[FEATURE_AXIS], kernel_size=CONV_KERNEL, strides=CONV_STRIDE, padding="same", activation="relu", kernel_initializer="he_normal")(current_layer) if use_bn: current_layer = BatchNormalization( axis=FEATURE_AXIS)(current_layer) act = Conv3D(self._num_classes, (1, 1, 1), activation="softmax", padding="same", kernel_initializer="he_normal")(current_layer) self._model = Model(inputs=[inputs], outputs=[act]) return self._model
def get_generator(self): # Generator will take in the patched image, mask, sketch info, color_info and random noise inp = Input(shape=(self.vars.INP_SHAPE[0], self.vars.INP_SHAPE[1], 9)) cnum = 64 x1, mask1 = self.GatedConv2D(inp, cnum, (7, 7), (2, 2), use_lrn=False) x2, mask2 = self.GatedConv2D(x1, 2 * cnum, (5, 5), (2, 2)) x3, mask3 = self.GatedConv2D(x2, 4 * cnum, (5, 5), (2, 2)) x4, mask4 = self.GatedConv2D(x3, 8 * cnum, (3, 3), (2, 2)) x5, mask5 = self.GatedConv2D(x4, 8 * cnum, (3, 3), (2, 2)) x6, mask6 = self.GatedConv2D(x5, 8 * cnum, (3, 3), (2, 2)) x7, mask7 = self.GatedConv2D(x6, 8 * cnum, (3, 3), (2, 2)) x7, _ = self.GatedConv2D(x7, 8 * cnum, (3, 3), (1, 1), dilation=2) x7, _ = self.GatedConv2D(x7, 8 * cnum, (3, 3), (1, 1), dilation=4) x7, _ = self.GatedConv2D(x7, 8 * cnum, (3, 3), (1, 1), dilation=8) x7, _ = self.GatedConv2D(x7, 8 * cnum, (3, 3), (1, 1), dilation=16) x8, _ = self.GatedDeConv2D(x7, [ self.vars.TRAIN_BATCH_SIZE, int(self.vars.INP_SHAPE[0] / 64), int(self.vars.INP_SHAPE[1] / 64), 8 * cnum ]) x8 = Concatenate(axis=0)([x6, x8]) x8, mask8 = self.GatedConv2D(x8, 8 * cnum, (3, 3), (1, 1)) x9, _ = self.GatedDeConv2D(x8, [ self.vars.TRAIN_BATCH_SIZE, int(self.vars.INP_SHAPE[0] / 32), int(self.vars.INP_SHAPE[1] / 32), 8 * cnum ]) x9 = Concatenate(axis=0)([x5, x9]) x9, mask9 = self.GatedConv2D(x9, 8 * cnum, (3, 3), (1, 1)) x10, _ = self.GatedDeConv2D(x9, [ self.vars.TRAIN_BATCH_SIZE, int(self.vars.INP_SHAPE[0] / 16), int(self.vars.INP_SHAPE[1] / 16), 8 * cnum ]) x10 = Concatenate(axis=0)([x4, x10]) x10, mask10 = self.GatedConv2D(x10, 8 * cnum, (3, 3), (1, 1)) x11, _ = self.GatedDeConv2D(x10, [ self.vars.TRAIN_BATCH_SIZE, int(self.vars.INP_SHAPE[0] / 8), int(self.vars.INP_SHAPE[1] / 8), 4 * cnum ]) x11 = Concatenate(axis=0)([x3, x11]) x11, mask11 = self.GatedConv2D(x11, 4 * cnum, (3, 3), (1, 1)) x12, _ = self.GatedDeConv2D(x11, [ self.vars.TRAIN_BATCH_SIZE, int(self.vars.INP_SHAPE[0] / 4), int(self.vars.INP_SHAPE[1] / 4), 2 * cnum ]) x12 = Concatenate(axis=0)([x2, x12]) x12, mask12 = self.GatedConv2D(x12, 2 * cnum, (3, 3), (1, 1)) x13, _ = self.GatedDeConv2D(x12, [ self.vars.TRAIN_BATCH_SIZE, int(self.vars.INP_SHAPE[0] / 2), int(self.vars.INP_SHAPE[1] / 2), cnum ]) x13 = Concatenate(axis=0)([x1, x13]) x13, mask13 = self.GatedConv2D(x13, cnum, (3, 3), (1, 1)) x14, _ = self.GatedDeConv2D(x13, [ self.vars.TRAIN_BATCH_SIZE, int(self.vars.INP_SHAPE[0]), int(self.vars.INP_SHAPE[1]), 9 ]) x14 = Concatenate(axis=0)([inp, x14]) x14, mask14 = self.GatedConv2D(x14, 3, (3, 3), (1, 1)) x14 = Activation('tanh')(x14) model = Model(inputs=inp, outputs=[x14, mask14]) return model
def Deeplabv3(weights='pascal_voc', input_tensor=None, infer=False, input_shape=(512, 512, 3), classes=21, backbone='mobilenetv2', OS=16, alpha=1.): """ Instantiates the Deeplabv3+ architecture Optionally loads weights pre-trained on PASCAL VOC. This model is available for TensorFlow only, and can only be used with inputs following the TensorFlow data format `(width, height, channels)`. # Arguments weights: one of 'pascal_voc' (pre-trained on pascal voc) or None (random initialization) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: shape of input image. format HxWxC PASCAL VOC model was trained on (512,512,3) images classes: number of desired classes. If classes != 21, last layer is initialized randomly backbone: backbone to use. one of {'xception','mobilenetv2'} OS: determines input_shape/feature_extractor_output ratio. One of {8,16}. Used only for xception backbone. alpha: controls the width of the MobileNetV2 network. This is known as the width multiplier in the MobileNetV2 paper. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. Used only for mobilenetv2 backbone # Returns A Keras model instance. # Raises RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. ValueError: in case of invalid argument for `weights` or `backbone` """ if not (weights in {'pascal_voc', None}): raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `pascal_voc` ' '(pre-trained on PASCAL VOC)') if K.backend() != 'tensorflow': raise RuntimeError('The Deeplabv3+ model is only available with ' 'the TensorFlow backend.') if not (backbone in {'xception', 'mobilenetv2'}): raise ValueError('The `backbone` argument should be either ' '`xception` or `mobilenetv2` ') if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor batches_input = Lambda(lambda x: x / 127.5 - 1)(img_input) if backbone == 'xception': if OS == 8: entry_block3_stride = 1 middle_block_rate = 2 # ! Not mentioned in paper, but required exit_block_rates = (2, 4) atrous_rates = (12, 24, 36) else: entry_block3_stride = 2 middle_block_rate = 1 exit_block_rates = (1, 2) atrous_rates = (6, 12, 18) x = Conv2D(32, (3, 3), strides=(2, 2), name='entry_flow_conv1_1', use_bias=False, padding='same')(batches_input) x = BatchNormalization(name='entry_flow_conv1_1_BN')(x) x = Activation('relu')(x) x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1) x = BatchNormalization(name='entry_flow_conv1_2_BN')(x) x = Activation('relu')(x) x = _xception_block(x, [128, 128, 128], 'entry_flow_block1', skip_connection_type='conv', stride=2, depth_activation=False) x, skip1 = _xception_block(x, [256, 256, 256], 'entry_flow_block2', skip_connection_type='conv', stride=2, depth_activation=False, return_skip=True) x = _xception_block(x, [728, 728, 728], 'entry_flow_block3', skip_connection_type='conv', stride=entry_block3_stride, depth_activation=False) for i in range(16): x = _xception_block(x, [728, 728, 728], 'middle_flow_unit_{}'.format(i + 1), skip_connection_type='sum', stride=1, rate=middle_block_rate, depth_activation=False) x = _xception_block(x, [728, 1024, 1024], 'exit_flow_block1', skip_connection_type='conv', stride=1, rate=exit_block_rates[0], depth_activation=False) x = _xception_block(x, [1536, 1536, 2048], 'exit_flow_block2', skip_connection_type='none', stride=1, rate=exit_block_rates[1], depth_activation=True) else: OS = 8 first_block_filters = _make_divisible(32 * alpha, 8) x = Conv2D(first_block_filters, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name='Conv')(batches_input) x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_BN')(x) x = Lambda(lambda x: relu(x, max_value=6.))(x) x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0, skip_connection=False) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1, skip_connection=False) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2, skip_connection=True) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3, skip_connection=False) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4, skip_connection=True) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5, skip_connection=True) # stride in block 6 changed from 2 -> 1, so we need to use rate = 2 x = _inverted_res_block( x, filters=64, alpha=alpha, stride=1, # 1! expansion=6, block_id=6, skip_connection=False) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=7, skip_connection=True) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=8, skip_connection=True) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=9, skip_connection=True) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=10, skip_connection=False) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=11, skip_connection=True) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=12, skip_connection=True) x = _inverted_res_block( x, filters=160, alpha=alpha, stride=1, rate=2, # 1! expansion=6, block_id=13, skip_connection=False) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4, expansion=6, block_id=14, skip_connection=True) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4, expansion=6, block_id=15, skip_connection=True) x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, rate=4, expansion=6, block_id=16, skip_connection=False) # end of feature extractor # branching for Atrous Spatial Pyramid Pooling # Image Feature branch #out_shape = int(np.ceil(input_shape[0] / OS)) b4 = AveragePooling2D(pool_size=(int(np.ceil(input_shape[0] / OS)), int(np.ceil(input_shape[1] / OS))))(x) b4 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='image_pooling')(b4) b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4) b4 = Activation('relu')(b4) b4 = Lambda(lambda x: K.tf.image.resize_bilinear( x, size=(int(np.ceil(input_shape[0] / OS)), int(np.ceil(input_shape[1] / OS)))))(b4) # simple 1x1 b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x) b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0) b0 = Activation('relu', name='aspp0_activation')(b0) # there are only 2 branches in mobilenetV2. not sure why if backbone == 'xception': # rate = 6 (12) b1 = SepConv_BN(x, 256, 'aspp1', rate=atrous_rates[0], depth_activation=True, epsilon=1e-5) # rate = 12 (24) b2 = SepConv_BN(x, 256, 'aspp2', rate=atrous_rates[1], depth_activation=True, epsilon=1e-5) # rate = 18 (36) b3 = SepConv_BN(x, 256, 'aspp3', rate=atrous_rates[2], depth_activation=True, epsilon=1e-5) # concatenate ASPP branches & project x = Concatenate()([b4, b0, b1, b2, b3]) else: x = Concatenate()([b4, b0]) x = Conv2D(256, (1, 1), padding='same', use_bias=False, name='concat_projection')(x) x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x) x = Activation('relu')(x) x = Dropout(0.1)(x) # DeepLab v.3+ decoder if backbone == 'xception': # Feature projection # x4 (x2) block x = Lambda(lambda x: K.tf.image.resize_bilinear( x, size=(int(np.ceil(input_shape[0] / 4)), int(np.ceil(input_shape[1] / 4)))))(x) dec_skip1 = Conv2D(48, (1, 1), padding='same', use_bias=False, name='feature_projection0')(skip1) dec_skip1 = BatchNormalization(name='feature_projection0_BN', epsilon=1e-5)(dec_skip1) dec_skip1 = Activation('relu')(dec_skip1) x = Concatenate()([x, dec_skip1]) x = SepConv_BN(x, 256, 'decoder_conv0', depth_activation=True, epsilon=1e-5) x = SepConv_BN(x, 256, 'decoder_conv1', depth_activation=True, epsilon=1e-5) # you can use it with arbitary number of classes if classes == 21: last_layer_name = 'logits_semantic' else: last_layer_name = 'custom_logits_semantic' x = Conv2D(classes, (1, 1), padding='same', name=last_layer_name)(x) x = Lambda(lambda x: K.tf.image.resize_bilinear( x, size=(input_shape[0], input_shape[1])))(x) if infer: x = Activation('softmax')(x) else: x = Reshape((input_shape[0] * input_shape[1], classes))(x) x = Activation('softmax')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input model = Model(inputs, x, name='deeplabv3p') # load weights if weights == 'pascal_voc': if backbone == 'xception': weights_path = get_file( 'deeplabv3_xception_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH_X, cache_subdir='models') else: weights_path = get_file( 'deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH_MOBILE, cache_subdir='models') model.load_weights(weights_path, by_name=True) return model
activation=relu)(branch2) branch2 = AveragePooling2D()(branch2) branch2 = Conv2D(filters = 128, kernel_size = (4, 2), strides = (1, 1), padding='same', activation=relu)(branch2) branch2 = AveragePooling2D(pool_size = (5,1))(branch2) branch2 = Flatten()(branch2) #what is axis? layer = Concatenate(axis=1)([branch1, branch2]) layer = Dropout(0.25)(layer) layer = Dense(units=200, activation=relu)(layer) layer = Dense(num_classes, activation='softmax')(layer) model = Model(input_layer, layer) #need to add softmax model.summary()
# var_out = dense_layer(var_in) # ``` # * [Activation()](https://keras.io/layers/core/#activation) # ```Python # activation = activation_layer(var_in) # ``` # * [Dot()](https://keras.io/layers/merge/#dot) # ```Python # dot_product = dot_layer([var1,var2]) # ``` # In[6]: # Defined shared layers as global variables repeator = RepeatVector(Tx) concatenator = Concatenate(axis=-1) densor1 = Dense(10, activation="tanh") densor2 = Dense(1, activation="relu") activator = Activation( softmax, name='attention_weights' ) # We are using a custom softmax(axis = 1) loaded in this notebook dotor = Dot(axes=1) # In[7]: # GRADED FUNCTION: one_step_attention def one_step_attention(a, s_prev): """ Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
def get_model(n_classes, input_height=224, input_width=224, weights=None): n_filters = 32 kernel_size = 3 dropout = 0.25 img_input = Input(shape=(input_height, input_width, 3)) # Assume 224,224,3 # Start with 224x224x3. Apply 3x3x16 Convolution, padding same and 2x2 Pooling. New dimensions: # 112x112x16 c1 = conv2d_block(n_filters=n_filters * 1, kernel_size=kernel_size, input_tensor=img_input) p1 = MaxPooling2D((2, 2))(c1) d1 = Dropout(dropout)(p1) # 112x112x16. Apply 3x3x32 Convolution, padding same and 2x2 Pooling. New dimensions: # 56x56x32 c2 = conv2d_block(n_filters=n_filters * 2, kernel_size=kernel_size, input_tensor=d1) p2 = MaxPooling2D((2, 2))(c2) d2 = Dropout(dropout)(p2) # 56x56x32. Apply 3x3x64 Convolution, padding same and 2x2 Pooling. New dimensions: # 28x28x64 c3 = conv2d_block(n_filters=n_filters * 4, kernel_size=kernel_size, input_tensor=d2) p3 = MaxPooling2D((2, 2))(c3) d3 = Dropout(dropout)(p3) # 28x28x64. Apply 3x3x128 Convolution, padding same and 2x2 Pooling. New dimensions: # 14x14x128 c4 = conv2d_block(n_filters=n_filters * 8, kernel_size=kernel_size, input_tensor=d3) p4 = MaxPooling2D((2, 2))(c4) d4 = Dropout(dropout)(p4) # 14x14x128. Apply 3x3x256 Convolution, padding same. New dimensions: 14x14x256 c5 = conv2d_block(n_filters=n_filters * 16, kernel_size=kernel_size, input_tensor=d4) # Upsampling part starts here # Start with dimensions 14x14x256 u6 = Conv2DTranspose(n_filters * 8, kernel_size=(kernel_size, kernel_size), strides=(2, 2), padding='same')(c5) u6 = Concatenate()([u6, c4]) d6 = Dropout(dropout)(u6) c6 = conv2d_block(n_filters * 8, kernel_size=3, input_tensor=d6) u7 = Conv2DTranspose(n_filters * 4, kernel_size=(kernel_size, kernel_size), strides=(2, 2), padding='same')(c6) u7 = Concatenate()([u7, c3]) d7 = Dropout(dropout)(u7) c7 = conv2d_block(n_filters * 4, kernel_size=3, input_tensor=d7) u8 = Conv2DTranspose(n_filters * 2, kernel_size=(kernel_size, kernel_size), strides=(2, 2), padding='same')(c7) u8 = Concatenate()([u8, c2]) d8 = Dropout(dropout)(u8) c8 = conv2d_block(n_filters * 2, kernel_size=3, input_tensor=d8) u9 = Conv2DTranspose(n_filters * 1, kernel_size=(kernel_size, kernel_size), strides=(2, 2), padding='same')(c8) u9 = Concatenate()([u9, c1]) d9 = Dropout(dropout)(u9) c9 = conv2d_block(n_filters * 1, kernel_size=3, input_tensor=d9) # Apply 1x1 convolution outputs = Conv2DTranspose(n_classes, (1, 1), activation='softmax')(c9) model = Model(inputs=[img_input], outputs=[outputs]) model.summary() if weights is not None: model.load_weights(weights) opt = Adam(lr=1E-5, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) return model
embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, input_length=sequence_length)(inputs) conv_list = list() for ftr in filter_sizes: conv = Conv1D(num_filters, kernel_size=ftr, padding='same', kernel_initializer='normal', activation='relu')(embedding) maxpool = MaxPooling1D(pool_size=(sequence_length - ftr + 1), padding='valid')(conv) conv_list.append(maxpool) concatenated_tensor = Concatenate(axis=1)(conv_list) flatten = Flatten()(concatenated_tensor) dropout = Dropout(drop)(flatten) output = Dense(units=2, activation='softmax')(dropout) model = Model(inputs=inputs, outputs=output) checkpoint = ModelCheckpoint('weights.{epoch:03d}-{val_acc:.4f}.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='auto') adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) model.compile(optimizer=adam, loss='categorical_crossentropy',
activation='relu')(xc14b) xct1b = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(xct1) xct2 = Conv2DTranspose(512, (3, 3), strides=(1, 1), padding='same', data_format=None, activation='relu')(xct1b) xct2b = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(xct2) inception1_12 = Conv2D(512, (1, 1), padding='same', activation='relu', strides=(1, 1))(xmp12) inception1_12b = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(inception1_12) concat1 = Concatenate(axis=-1)([xct2b, inception1_12b]) xup3 = UpSampling2D(size=(2, 2), data_format=None)(concat1) xct4 = Conv2DTranspose(512, (3, 3), strides=(1, 1), padding='same', data_format=None, activation='relu')(xup3) xct4b = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(xct4) xct5 = Conv2DTranspose(512, (3, 3), strides=(1, 1), padding='same', data_format=None, activation='relu')(xct4b) xct5b = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(xct5) inception1_9 = Conv2D(256, (1, 1),
def DeeplabEncoder(input_shape=[(256, 256, 4), (128, 128, 4)], classes=2, backbone='xception', OS=16, alpha=1.): """ Instantiates the Deeplabv3+ architecture Optionally loads weights pre-trained on PASCAL VOC. This model is available for TensorFlow only, and can only be used with inputs following the TensorFlow data format `(width, height, channels)`. # Arguments weights: one of 'pascal_voc' (pre-trained on pascal voc) or None (random initialization) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: shape of input image. format HxWxC PASCAL VOC model was trained on (512,512,3) images classes: number of desired classes. If classes != 21, last layer is initialized randomly backbone: backbone to use. one of {'xception','mobilenetv2'} OS: determines input_shape/feature_extractor_output ratio. One of {8,16}. Used only for xception backbone. alpha: controls the width of the MobileNetV2 network. This is known as the width multiplier in the MobileNetV2 paper. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. Used only for mobilenetv2 backbone # Returns A Keras model instance. # Raises RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. ValueError: in case of invalid argument for `weights` or `backbone` """ imgMainInput = Input(shape=(256, 256, 4), name='Input10') imgSideInput = Input(shape=(128, 128, 4), name='Input20') if backbone == 'xception': if OS == 8: entry_block3_stride = 1 middle_block_rate = 2 # ! Not mentioned in paper, but required exit_block_rates = (2, 4) atrous_rates = (12, 24, 36) else: entry_block3_stride = 2 middle_block_rate = 1 exit_block_rates = (1, 2) atrous_rates = (6, 12, 18) x = Conv2D(32, (3, 3), strides=(2, 2), name='entry_flow_conv1_1', use_bias=False, padding='same')(imgMainInput) x = BatchNormalization(name='entry_flow_conv1_1_BN')(x) x = Activation('relu')(x) x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1) x = BatchNormalization(name='entry_flow_conv1_2_BN')(x) x = Activation('relu')(x) x = _xception_block(x, [128, 128, 128], 'entry_flow_block1', skip_connection_type='conv', stride=2, depth_activation=False) x, skip1 = _xception_block(x, [256, 256, 256], 'entry_flow_block2', skip_connection_type='conv', stride=2, depth_activation=False, return_skip=True) x = _xception_block(x, [728, 728, 728], 'entry_flow_block3', skip_connection_type='conv', stride=entry_block3_stride, depth_activation=False) for i in range(8): x = _xception_block(x, [728, 728, 728], 'middle_flow_unit_{}'.format(i + 1), skip_connection_type='sum', stride=1, rate=middle_block_rate, depth_activation=False) x = _xception_block(x, [728, 1024, 1024], 'exit_flow_block1', skip_connection_type='conv', stride=1, rate=exit_block_rates[0], depth_activation=False) x = _xception_block(x, [1536, 1536, 2048], 'exit_flow_block2', skip_connection_type='none', stride=1, rate=exit_block_rates[1], depth_activation=True) # end of feature extractor # branching for Atrous Spatial Pyramid Pooling # Image Feature branch #out_shape = int(np.ceil(input_shape[0] / OS)) b4 = AveragePooling2D(pool_size=(int(np.ceil(256 / OS)), int(np.ceil(256 / OS))))(x) b4 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='image_pooling')(b4) b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4) b4 = Activation('relu')(b4) b4 = BilinearUpsampling( (int(np.ceil(256 / OS)), int(np.ceil(256 / OS))))(b4) # simple 1x1 b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x) b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0) b0 = Activation('relu', name='aspp0_activation')(b0) # there are only 2 branches in mobilenetV2. not sure why if backbone == 'xception': # rate = 6 (12) b1 = SepConv_BN(x, 256, 'aspp1', rate=atrous_rates[0], depth_activation=True, epsilon=1e-5) # rate = 12 (24) b2 = SepConv_BN(x, 256, 'aspp2', rate=atrous_rates[1], depth_activation=True, epsilon=1e-5) # rate = 18 (36) b3 = SepConv_BN(x, 256, 'aspp3', rate=atrous_rates[2], depth_activation=True, epsilon=1e-5) # concatenate ASPP branches & project x = Concatenate()([b4, b0, b1, b2, b3]) x = Conv2D(256, (1, 1), padding='same', use_bias=False, name='concat_projection')(x) x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x) x = GlobalAveragePooling2D()(x) x = Dense(2, activation='softmax')(x) model = Model([imgMainInput, imgSideInput], x, name='deeplabEncoder') return model
def _build_network(self, network_input, network_output, additional_network_outputs): cluster_counts = list(self.data_provider.get_cluster_counts()) # The simple loss cluster NN requires a specific output: a list of softmax distributions # First in this list are all softmax distributions for k=k_min for each object, then for k=k_min+1 for each # object etc. At the end, there is the cluster count output. # First we get an embedding for the network inputs embeddings = self._get_embedding(network_input) # Reshape all embeddings to 1d vectors # embedding_shape = self._embedding_nn.model.layers[-1].output_shape # embedding_size = np.prod(embedding_shape[1:]) embedding_shape = embeddings[0].shape embedding_size = int(str(np.prod(embedding_shape[1:]))) embedding_reshaper = self._s_layer( 'embedding_reshape', lambda name: Reshape( (1, embedding_size), name=name)) embeddings_reshaped = [ embedding_reshaper(embedding) for embedding in embeddings ] # Merge all embeddings to one tensor embeddings_merged = self._s_layer( 'embeddings_merge', lambda name: Concatenate(axis=1, name=name))(embeddings_reshaped) self._add_additional_prediction_output(embeddings_merged, 'Embeddings') # Use now some LSTM-layer to process all embeddings processed = embeddings_merged for i in range(self.__lstm_layers): processed = self._s_layer( 'LSTM_proc_{}'.format(i), lambda name: Bidirectional(LSTM(self.__lstm_units, return_sequences=True), name=name))(processed) processed = self._s_layer( 'LSTM_proc_{}_batch'.format(i), lambda name: BatchNormalization(name=name))(processed) # Split the tensor to seperate layers embeddings_processed = [ self._s_layer('slice_{}'.format(i), lambda name: slice_layer(processed, i, name)) for i in range(len(network_input)) ] # Create now two outputs: The cluster count and for each cluster count / object combination a softmax distribution. # These outputs are independent of each other, therefore it doesn't matter which is calculated first. Let us start # with the cluster count / object combinations. # First prepare some generally required layers layers = [] for i in range(self.__output_dense_layers): layers += [ self._s_layer( 'output_dense{}'.format(i), lambda name: Dense(self.__output_dense_units, name=name)), self._s_layer('output_batch'.format(i), lambda name: BatchNormalization(name=name)), LeakyReLU() # self._s_layer('output_relu'.format(i), lambda name: Activation(LeakyReLU(), name=name)) ] cluster_softmax = { k: self._s_layer( 'softmax_cluster_{}'.format(k), lambda name: Dense(k, activation='softmax', name=name)) for k in cluster_counts } # Create now the outputs clusters_output = additional_network_outputs['clusters'] = {} for i in range(len(embeddings_processed)): embedding_proc = embeddings_processed[i] # Add the required layers for layer in layers: embedding_proc = layer(embedding_proc) input_clusters_output = clusters_output['input{}'.format(i)] = {} for k in cluster_counts: # Create now the required softmax distributions output_classifier = cluster_softmax[k](embedding_proc) input_clusters_output['cluster{}'.format( k)] = output_classifier network_output.append(output_classifier) # Calculate the real cluster count cluster_count = self._s_layer( 'cluster_count_LSTM_merge', lambda name: Bidirectional(LSTM(self.__lstm_units), name=name) (embeddings_merged)) cluster_count = self._s_layer( 'cluster_count_LSTM_merge_batch', lambda name: BatchNormalization(name=name))(cluster_count) for i in range(self.__cluster_count_dense_layers): cluster_count = self._s_layer( 'cluster_count_dense{}'.format(i), lambda name: Dense(self.__cluster_count_dense_units, name=name ))(cluster_count) cluster_count = self._s_layer( 'cluster_count_batch{}'.format(i), lambda name: BatchNormalization(name=name))(cluster_count) cluster_count = LeakyReLU()(cluster_count) # cluster_count = self._s_layer('cluster_count_relu{}'.format(i), lambda name: Activation(LeakyReLU(), name=name))(cluster_count) # The next layer is an output-layer, therefore the name must not be formatted cluster_count = self._s_layer( 'cluster_count_output', lambda name: Dense( len(cluster_counts), activation='softmax', name=name), format_name=False)(cluster_count) additional_network_outputs['cluster_count_output'] = cluster_count network_output.append(cluster_count) return True
def build_model(input_layer, start_neurons): # 128 -> 64 conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(input_layer) conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(conv1) conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(conv1) conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(conv1) conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(conv1) conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(conv1) conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(conv1) conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(conv1) conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(conv1) conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(conv1) conv1 = Conv2D(start_neurons * 1, (3, 3), activation="relu", padding="same")(conv1) conv1 = BatchNormalization()(conv1) conv1 = Activation('relu')(conv1) pool1 = MaxPooling2D((2, 2))(conv1) # 64 -> 32 pool1 = Conv2D(start_neurons * 2, (1, 1), padding="same", use_bias=False)(pool1) res = Conv2D(start_neurons * 2, (1, 1), strides=(2, 2), padding="same", use_bias=False)(pool1) res = BatchNormalization()(res) pool1 = DepthwiseConv2D((3, 3), strides=(2, 2), padding="same", use_bias=False)(pool1) conv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(pool1) conv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(conv2) conv2 = Conv2D(start_neurons * 2, (3, 3), activation="relu", padding="same")(conv2) pool2 = add([conv2, res]) # pool2 = MaxPooling2D((2, 2))(conv2) # pool2 = Dropout(0.5)(pool2) # 32 -> 16 skip = BatchNormalization()(pool2) res3 = Conv2D(start_neurons * 4, (1, 1), strides=(2, 2), padding="same", use_bias=False)(pool2) res3 = BatchNormalization()(res3) pool2 = DepthwiseConv2D((3, 3), strides=(2, 2), padding="same", use_bias=False)(pool2) conv3 = Conv2D(start_neurons * 4, (3, 3), activation="relu", padding="same")(pool2) conv3 = Conv2D(start_neurons * 4, (1, 1), activation="relu", padding="same")(conv3) conv3 = Conv2D(start_neurons * 4, (1, 1), activation="relu", padding="same")(conv3) conv3 = Activation('relu')(conv3) # pool3 = MaxPooling2D((2, 2))(conv3) # pool3 = Dropout(0.5)(pool3) pool3 = add([conv3, res3]) # 16 -> 8 conv4 = Conv2D(start_neurons * 8, (3, 3), activation="relu", padding="same")(pool3) conv4 = Conv2D(start_neurons * 8, (3, 3), activation="relu", padding="same")(conv4) conv4 = Conv2D(start_neurons * 8, (1, 1), activation="relu", padding="same")(conv4) conv4 = BatchNormalization()(conv4) conv4 = Activation('relu')(conv4) x = MaxPooling2D((2, 2))(conv4) # pool4 = Dropout(0.5)(pool4) for i in range(6): x = res_xception_block(x, 256) # aspp x = aspp(x, input_shape, out_stride) x = Conv2D(256, (1, 1), padding="same", use_bias=False)(x) x = BatchNormalization()(x) x = Activation("relu")(x) # x = Dropout(0.9)(x) ##decoder x = BilinearUpsampling((4, 4))(x) dec_skip = Conv2D(256, (1, 1), padding="same", use_bias=False)(skip) dec_skip = BatchNormalization()(dec_skip) dec_skip = Activation("relu")(dec_skip) x = Concatenate()([x, dec_skip]) x = DepthwiseConv2D((3, 3), padding="same", use_bias=False)(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = Conv2D(256, (1, 1), padding="same", use_bias=False)(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = DepthwiseConv2D((3, 3), padding="same", use_bias=False)(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = Conv2D(256, (1, 1), padding="same", use_bias=False)(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = Conv2D(1, (1, 1), padding="same")(x) x = BilinearUpsampling((4, 4))(x) x = Conv2D(1, (1, 1), padding="same", activation="sigmoid")(x) return x
def build_model(lr, l2, activation='sigmoid'): ############## # BRANCH MODEL ############## regul = regularizers.l2(l2) optim = Adam(lr=lr) kwargs = {'padding': 'same', 'kernel_regularizer': regul} inp = Input(shape=img_shape) # 384x384x1 x = Conv2D(64, (9, 9), strides=2, activation='relu', **kwargs)(inp) x = MaxPooling2D((2, 2), strides=(2, 2))(x) # 96x96x64 for _ in range(2): x = BatchNormalization()(x) x = Conv2D(64, (3, 3), activation='relu', **kwargs)(x) x = MaxPooling2D((2, 2), strides=(2, 2))(x) # 48x48x64 x = BatchNormalization()(x) x = Conv2D(128, (1, 1), activation='relu', **kwargs)(x) # 48x48x128 for _ in range(4): x = subblock(x, 64, **kwargs) x = MaxPooling2D((2, 2), strides=(2, 2))(x) # 24x24x128 x = BatchNormalization()(x) x = Conv2D(256, (1, 1), activation='relu', **kwargs)(x) # 24x24x256 for _ in range(4): x = subblock(x, 64, **kwargs) x = MaxPooling2D((2, 2), strides=(2, 2))(x) # 12x12x256 x = BatchNormalization()(x) x = Conv2D(384, (1, 1), activation='relu', **kwargs)(x) # 12x12x384 for _ in range(4): x = subblock(x, 96, **kwargs) x = MaxPooling2D((2, 2), strides=(2, 2))(x) # 6x6x384 x = BatchNormalization()(x) x = Conv2D(512, (1, 1), activation='relu', **kwargs)(x) # 6x6x512 for _ in range(4): x = subblock(x, 128, **kwargs) x = GlobalMaxPooling2D()(x) # 512 branch_model = Model(inp, x) ############ # HEAD MODEL ############ mid = 32 xa_inp = Input(shape=branch_model.output_shape[1:]) xb_inp = Input(shape=branch_model.output_shape[1:]) x1 = Lambda(lambda x: x[0] * x[1])([xa_inp, xb_inp]) x2 = Lambda(lambda x: x[0] + x[1])([xa_inp, xb_inp]) x3 = Lambda(lambda x: K.abs(x[0] - x[1]))([xa_inp, xb_inp]) x4 = Lambda(lambda x: K.square(x))(x3) x = Concatenate()([x1, x2, x3, x4]) x = Reshape((4, branch_model.output_shape[1], 1), name='reshape1')(x) # Per feature NN with shared weight is implemented using CONV2D with appropriate stride. x = Conv2D(mid, (4, 1), activation='relu', padding='valid')(x) x = Reshape((branch_model.output_shape[1], mid, 1))(x) x = Conv2D(1, (1, mid), activation='linear', padding='valid')(x) x = Flatten(name='flatten')(x) # Weighted sum implemented as a Dense layer. x = Dense(1, use_bias=True, activation=activation, name='weighted-average')(x) head_model = Model([xa_inp, xb_inp], x, name='head') ######################## # SIAMESE NEURAL NETWORK ######################## # Complete model is constructed by calling the branch model on each input image, # and then the head model on the resulting 512-vectors. img_a = Input(shape=img_shape) img_b = Input(shape=img_shape) xa = branch_model(img_a) xb = branch_model(img_b) x = head_model([xa, xb]) model = Model([img_a, img_b], x) model.compile(optim, loss='binary_crossentropy', metrics=['binary_crossentropy', 'acc']) return model, branch_model, head_model
#==================================== Initilizing Model ===================== image_model = Sequential() image_model.add(Dense(embedding_size, input_shape=(2048, ), activation='relu')) image_model.add(RepeatVector(max_len)) language_model = Sequential() language_model.add( Embedding(input_dim=vocab_size, output_dim=embedding_size, input_length=max_len)) language_model.add(LSTM(256, return_sequences=True)) language_model.add(TimeDistributed(Dense(embedding_size))) # Concatinating our model conca = Concatenate()([image_model.output, language_model.output]) x = LSTM(128, return_sequences=True)(conca) x = LSTM(512, return_sequences=False)(x) x = Dense(vocab_size)(x) out = Activation('softmax')(x) model = Model(inputs=[image_model.input, language_model.input], outputs=out) model.compile(loss='categorical_crossentropy', optimizer='RMSprop', metrics=['accuracy']) # loading weights of of our pre-traind model model.load_weights('model.h5') print("=" * 150) print("MODEL LOADED")
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) S = nx.to_scipy_sparse_matrix(graph) print ("S graph adj mat ") print (S.shape) print ("S graph adj mat maximum") print (np.amax(S)) self._node_num = graph.number_of_nodes() print ("S graph nodes") print (self._node_num) t1 = time() S = (S + S.T) / 2 # Generate encoder, decoder and autoencoder self._num_iter = self._n_iter # If cannot use previous step information, initialize new models self._encoder = get_encoder(self._node_num, self._d, self._K, self._n_units, self._nu1, self._nu2, self._actfn) self._decoder = get_decoder(self._node_num, self._d, self._K, self._n_units, self._nu1, self._nu2, self._actfn) self._autoencoder = get_autoencoder(self._encoder, self._decoder) # Initialize self._model # Input x_in = Input(shape=(2 * self._node_num,), name='x_in') x1 = Lambda( lambda x: x[:, 0:self._node_num], output_shape=(self._node_num,) )(x_in) #x1= x_in[:, 0:self._node_num] x2 = Lambda( lambda x: x[:, self._node_num:2 * self._node_num], output_shape=(self._node_num,) )(x_in) #x2= x_in[:, self._node_num:2 * self._node_num] # Process inputs [x_hat1, y1] = self._autoencoder(x1) [x_hat2, y2] = self._autoencoder(x2) # Outputs # x_diff1 = merge([x_hat1, x1], # mode=lambda ab: ab[0] - ab[1], # output_shape=lambda L: L[1]) # x_diff1 = Subtract() ([x_hat1, x1]) # x_diff2 = merge([x_hat2, x2], # mode=lambda ab: ab[0] - ab[1], # output_shape=lambda L: L[1]) # y_diff = merge([y2, y1], # mode=lambda ab: ab[0] - ab[1], # output_shape=lambda L: L[1]) x_diff1 = Subtract() ([x_hat1, x1]) x_diff2 = Subtract() ([x_hat2, x2]) y_diff = Subtract() ([y2, y1]) #dummy_1 = KBack.constant(0.0,shape=(1, None)) #dummy_2 = KBack.constant(0.0,shape=(1, None)) dummy_1=KBack.sum(x_diff1,axis=1,keepdims=True) dummy_2=KBack.sum(x_diff2,axis=1,keepdims=True) #dummy_2=KBack.zeros(shape=(1,None)) z_diff1 = Concatenate(axis = 1)([x_diff1,dummy_1]) z_diff2 = Concatenate(axis = 1)([x_diff2,dummy_2]) #print (x_diff1.get_config()) #print (x_diff2.get_config()) #print (y_diff._keras_shape) #tf.Print (x_diff2, [KBack.shape(x_diff2)]) #tf.Print (y_diff, [KBack.shape(y_diff)]) # Objectives def weighted_mse_x(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains x_hat - x y_true: Contains [b, deg] ''' return KBack.sum( KBack.square(y_pred * y_true[:, 0:self._node_num]), axis=-1) / y_true[:, self._node_num] def weighted_mse_y(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains y2 - y1 y_true: Contains s12 ''' min_batch_size = KBack.shape(y_true)[0] return KBack.reshape( KBack.sum(KBack.square(y_pred), axis=-1), [min_batch_size, 1] ) * y_true # Model self._model = Model(input=x_in, output=[z_diff1, z_diff2, y_diff]) #print (self._model.summary()) # print (self._model.get_layer('merge_1').input_shape) # print (self._model.get_layer('merge_1').output_shape) # print (self._model.get_layer('merge_2').input_shape) # print (self._model.get_layer('merge_2').output_shape) # print (self._model.get_layer('model_3').output_shape) sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True) # adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08) self._model.compile( optimizer=sgd, loss=[weighted_mse_x, weighted_mse_x, weighted_mse_y], loss_weights=[1, 1, self._alpha] ) self._model.fit_generator( generator=batch_generator_sdne(S, self._beta, self._n_batch, True), nb_epoch=self._num_iter, samples_per_epoch=S.nonzero()[0].shape[0] // self._n_batch, verbose=1 ) # Get embedding for all points self._Y = model_batch_predictor(self._autoencoder, S, self._n_batch) t2 = time() # Save the autoencoder and its weights if(self._weightfile is not None): saveweights(self._encoder, self._weightfile[0]) saveweights(self._decoder, self._weightfile[1]) if(self._modelfile is not None): savemodel(self._encoder, self._modelfile[0]) savemodel(self._decoder, self._modelfile[1]) if(self._savefilesuffix is not None): saveweights( self._encoder, 'encoder_weights_' + self._savefilesuffix + '.hdf5' ) saveweights( self._decoder, 'decoder_weights_' + self._savefilesuffix + '.hdf5' ) savemodel( self._encoder, 'encoder_model_' + self._savefilesuffix + '.json' ) savemodel( self._decoder, 'decoder_model_' + self._savefilesuffix + '.json' ) # Save the embedding np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y) return self._Y, (t2 - t1)
def _main(args): config_path = os.path.expanduser(args.config_path) weights_path = os.path.expanduser(args.weights_path) assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format( config_path) assert weights_path.endswith( '.weights'), '{} is not a .weights file'.format(weights_path) output_path = os.path.expanduser(args.output_path) assert output_path.endswith( '.h5'), 'output path {} is not a .h5 file'.format(output_path) output_root = os.path.splitext(output_path)[0] # Load weights and config. print('Loading weights.') weights_file = open(weights_path, 'rb') major, minor, revision = np.ndarray( shape=(3, ), dtype='int32', buffer=weights_file.read(12)) if (major*10+minor)>=2 and major<1000 and minor<1000: seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8)) else: seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4)) print('Weights Header: ', major, minor, revision, seen) print('Parsing Darknet config.') unique_config_file = unique_config_sections(config_path) cfg_parser = configparser.ConfigParser() cfg_parser.read_file(unique_config_file) print('Creating Keras model.') input_layer = Input(shape=(None, None, 3)) prev_layer = input_layer all_layers = [] weight_decay = float(cfg_parser['net_0']['decay'] ) if 'net_0' in cfg_parser.sections() else 5e-4 count = 0 out_index = [] for section in cfg_parser.sections(): print('Parsing section {}'.format(section)) if section.startswith('convolutional'): filters = int(cfg_parser[section]['filters']) size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) pad = int(cfg_parser[section]['pad']) activation = cfg_parser[section]['activation'] batch_normalize = 'batch_normalize' in cfg_parser[section] padding = 'same' if pad == 1 and stride == 1 else 'valid' # Setting weights. # Darknet serializes convolutional weights as: # [bias/beta, [gamma, mean, variance], conv_weights] prev_layer_shape = K.int_shape(prev_layer) weights_shape = (size, size, prev_layer_shape[-1], filters) darknet_w_shape = (filters, weights_shape[2], size, size) weights_size = np.product(weights_shape) print('conv2d', 'bn' if batch_normalize else ' ', activation, weights_shape) conv_bias = np.ndarray( shape=(filters, ), dtype='float32', buffer=weights_file.read(filters * 4)) count += filters if batch_normalize: bn_weights = np.ndarray( shape=(3, filters), dtype='float32', buffer=weights_file.read(filters * 12)) count += 3 * filters bn_weight_list = [ bn_weights[0], # scale gamma conv_bias, # shift beta bn_weights[1], # running mean bn_weights[2] # running var ] conv_weights = np.ndarray( shape=darknet_w_shape, dtype='float32', buffer=weights_file.read(weights_size * 4)) count += weights_size # DarkNet conv_weights are serialized Caffe-style: # (out_dim, in_dim, height, width) # We would like to set these to Tensorflow order: # (height, width, in_dim, out_dim) conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) conv_weights = [conv_weights] if batch_normalize else [ conv_weights, conv_bias ] # Handle activation. act_fn = None if activation == 'leaky': pass # Add advanced activation later. elif activation != 'linear': raise ValueError( 'Unknown activation function `{}` in section {}'.format( activation, section)) # Create Conv2D layer if stride>1: # Darknet uses left and top padding instead of 'same' mode prev_layer = ZeroPadding2D(((1,0),(1,0)))(prev_layer) conv_layer = (Conv2D( filters, (size, size), strides=(stride, stride), kernel_regularizer=l2(weight_decay), use_bias=not batch_normalize, weights=conv_weights, activation=act_fn, padding=padding))(prev_layer) if batch_normalize: conv_layer = (BatchNormalization( weights=bn_weight_list))(conv_layer) prev_layer = conv_layer if activation == 'linear': all_layers.append(prev_layer) elif activation == 'leaky': act_layer = LeakyReLU(alpha=0.1)(prev_layer) prev_layer = act_layer all_layers.append(act_layer) elif section.startswith('route'): ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] layers = [all_layers[i] for i in ids] if len(layers) > 1: print('Concatenating route layers:', layers) concatenate_layer = Concatenate()(layers) all_layers.append(concatenate_layer) prev_layer = concatenate_layer else: skip_layer = layers[0] # only one layer to route all_layers.append(skip_layer) prev_layer = skip_layer elif section.startswith('maxpool'): size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) all_layers.append( MaxPooling2D( pool_size=(size, size), strides=(stride, stride), padding='same')(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('shortcut'): index = int(cfg_parser[section]['from']) activation = cfg_parser[section]['activation'] assert activation == 'linear', 'Only linear activation supported.' all_layers.append(Add()([all_layers[index], prev_layer])) prev_layer = all_layers[-1] elif section.startswith('upsample'): stride = int(cfg_parser[section]['stride']) assert stride == 2, 'Only stride=2 supported.' all_layers.append(UpSampling2D(stride)(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('yolo'): out_index.append(len(all_layers)-1) all_layers.append(None) prev_layer = all_layers[-1] elif section.startswith('net'): pass else: raise ValueError( 'Unsupported section header type: {}'.format(section)) # Create and save model. if len(out_index)==0: out_index.append(len(all_layers)-1) model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index]) print(model.summary()) if args.weights_only: model.save_weights('{}'.format(output_path)) print('Saved Keras weights to {}'.format(output_path)) else: model.save('{}'.format(output_path)) print('Saved Keras model to {}'.format(output_path)) # Check to see if all weights have been read. remaining_weights = len(weights_file.read()) / 4 weights_file.close() print('Read {} of {} from Darknet weights.'.format(count, count + remaining_weights)) if remaining_weights > 0: print('Warning: {} unused weights'.format(remaining_weights)) if args.plot_model: plot(model, to_file='{}.png'.format(output_root), show_shapes=True) print('Saved model plot to {}.png'.format(output_root))
def get_unet(self): inputs = Input((self.img_rows, self.img_cols, 1)) # 网络结构定义 ''' #unet with crop(because padding = valid) conv1 = Conv2D(64, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(inputs) print "conv1 shape:",conv1.shape conv1 = Conv2D(64, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv1) print "conv1 shape:",conv1.shape crop1 = Cropping2D(cropping=((90,90),(90,90)))(conv1) print "crop1 shape:",crop1.shape pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) print "pool1 shape:",pool1.shape conv2 = Conv2D(128, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(pool1) print "conv2 shape:",conv2.shape conv2 = Conv2D(128, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv2) print "conv2 shape:",conv2.shape crop2 = Cropping2D(cropping=((41,41),(41,41)))(conv2) print "crop2 shape:",crop2.shape pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) print "pool2 shape:",pool2.shape conv3 = Conv2D(256, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(pool2) print "conv3 shape:",conv3.shape conv3 = Conv2D(256, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv3) print "conv3 shape:",conv3.shape crop3 = Cropping2D(cropping=((16,17),(16,17)))(conv3) print "crop3 shape:",crop3.shape pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) print "pool3 shape:",pool3.shape conv4 = Conv2D(512, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(pool3) conv4 = Conv2D(512, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv4) drop4 = Dropout(0.5)(conv4) crop4 = Cropping2D(cropping=((4,4),(4,4)))(drop4) pool4 = MaxPooling2D(pool_size=(2, 2))(drop4) conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(pool4) conv5 = Conv2D(1024, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv5) drop5 = Dropout(0.5)(conv5) up6 = Conv2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(drop5)) merge6 = merge([crop4,up6], mode = 'concat', concat_axis = 3) conv6 = Conv2D(512, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(merge6) conv6 = Conv2D(512, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv6) up7 = Conv2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv6)) merge7 = merge([crop3,up7], mode = 'concat', concat_axis = 3) conv7 = Conv2D(256, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(merge7) conv7 = Conv2D(256, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv7) up8 = Conv2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv7)) merge8 = merge([crop2,up8], mode = 'concat', concat_axis = 3) conv8 = Conv2D(128, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(merge8) conv8 = Conv2D(128, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv8) up9 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv8)) merge9 = merge([crop1,up9], mode = 'concat', concat_axis = 3) conv9 = Conv2D(64, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(merge9) conv9 = Conv2D(64, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv9) conv9 = Conv2D(2, 3, activation = 'relu', padding = 'valid', kernel_initializer = 'he_normal')(conv9) ''' conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs) print("conv1 shape:", conv1.shape) conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1) print("conv1 shape:", conv1.shape) pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) print("pool1 shape:", pool1.shape) conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1) print("conv2 shape:", conv2.shape) conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2) print("conv2 shape:", conv2.shape) pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) print("pool2 shape:", pool2.shape) conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2) print("conv3 shape:", conv3.shape) conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3) print("conv3 shape:", conv3.shape) pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) print("pool3 shape:", pool3.shape) conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3) conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4) drop4 = Dropout(0.5)(conv4) pool4 = MaxPooling2D(pool_size=(2, 2))(drop4) conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4) conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5) drop5 = Dropout(0.5)(conv5) up6 = Conv2D(512, 2, activation='relu', padding='same', kernel_initializer='he_normal')( UpSampling2D(size=(2, 2))(drop5)) merge6 = Concatenate(axis=3)([drop4, up6]) conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge6) conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6) up7 = Conv2D(256, 2, activation='relu', padding='same', kernel_initializer='he_normal')( UpSampling2D(size=(2, 2))(conv6)) merge7 = Concatenate(axis=3)([conv3, up7]) conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7) conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7) up8 = Conv2D(128, 2, activation='relu', padding='same', kernel_initializer='he_normal')( UpSampling2D(size=(2, 2))(conv7)) merge8 = Concatenate(axis=3)([conv2, up8]) conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge8) conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8) up9 = Conv2D(64, 2, activation='relu', padding='same', kernel_initializer='he_normal')( UpSampling2D(size=(2, 2))(conv8)) merge9 = Concatenate(axis=3)([conv1, up9]) conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge9) conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9) conv9 = Conv2D(2, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9) conv10 = Conv2D(1, 1, activation='sigmoid')(conv9) model = Model(input=inputs, output=conv10) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy']) return model
WORD_EMBEDDING_LENGTH, border_mode='valid', activation='relu', name='hate_anger_conv_4')(reshape) maxpool_4 = MaxPooling2D(pool_size=(INPUT_WORDS - 4 + 1, 1), name='hate_anger_pooling_4')(ngram_4) ngram_5 = Convolution2D(50, 5, WORD_EMBEDDING_LENGTH, border_mode='valid', activation='relu', name='hate_anger_conv_5')(reshape) maxpool_5 = MaxPooling2D(pool_size=(INPUT_WORDS - 5 + 1, 1), name='hate_anger_pooling_5')(ngram_5) #1 branch again merged = Concatenate(axis=2)([maxpool_2, maxpool_3, maxpool_4, maxpool_5]) flatten = Flatten(name='hate_anger_flatten')(merged) # batch_norm = BatchNormalization()(flatten) dense_1 = Dense(256, activation='relu', name='hate_anger_dense_1')(flatten) drop_1 = Dropout(0.8, name='hate_anger_drop_1')(dense_1) dense_2 = Dense(256, activation='relu', name='hate_anger_dense_2')(drop_1) drop_2 = Dropout(0.8, name='hate_anger_drop_2')(dense_2) output_irony = Dense(3, activation='softmax', name='main_output')(drop_2) model = Model(input=[input_words], output=[output_irony]) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', 'mse', 'mae']) print 'Model built' print(model.summary()) sys.stdout.flush()
actor.add(Flatten(input_shape=(1, ) + observation_shape)) actor.add(Dense(400)) actor.add(Activation('relu')) actor.add(Dense(300)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('tanh')) print(actor.summary()) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + observation_shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Dense(400)(flattened_observation) x = Activation('relu')(x) x = Concatenate()([x, action_input]) x = Dense(300)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions,
def build_full_contour_integration_model(weights_file=None, rf_size=35, inner_leaky_relu_alpha=0.9, outer_leaky_relu_alpha=1., l1_reg_loss_weight=0.0005): """ Build the full contour integration Alexnet Model Note:[1] Model needs to be complied fore use. [2] The name of the layers after the contour integration layer are changed from alexnet so weights of alexnet can be loaded safely. :param weights_file: :param rf_size: :param inner_leaky_relu_alpha: :param outer_leaky_relu_alpha: :param l1_reg_loss_weight: :return: """ input_layer = Input(shape=(3, 227, 227)) conv_1 = Conv2D(96, (11, 11), strides=(4, 4), activation='relu', name='conv_1')(input_layer) contour_integrate_layer = ContourIntegrationLayer3D( tgt_filt_idx=0, # not important for full model rf_size=rf_size, inner_leaky_relu_alpha=inner_leaky_relu_alpha, outer_leaky_relu_alpha=outer_leaky_relu_alpha, l1_reg_loss_weight=l1_reg_loss_weight, name='contour_integration_layer')(conv_1) conv_2 = MaxPooling2D((3, 3), strides=(2, 2))(contour_integrate_layer) conv_2 = alex_net.crosschannelnormalization( name='Contrast_Normalization')(conv_2) conv_2 = ZeroPadding2D((2, 2))(conv_2) conv_2_1 = Conv2D(128, (5, 5), activation='relu', name='conv_22_1') \ (alex_net.splittensor(ratio_split=2, id_split=0)(conv_2)) conv_2_2 = Conv2D(128, (5, 5), activation='relu', name='conv_22_2') \ (alex_net.splittensor(ratio_split=2, id_split=1)(conv_2)) conv_2 = Concatenate(axis=1, name='conv_22')([conv_2_1, conv_2_2]) conv_3 = MaxPooling2D((3, 3), strides=(2, 2))(conv_2) conv_3 = alex_net.crosschannelnormalization()(conv_3) conv_3 = ZeroPadding2D((1, 1))(conv_3) conv_3 = Conv2D(384, (3, 3), activation='relu', name='conv_33')(conv_3) conv_4 = ZeroPadding2D((1, 1))(conv_3) conv_4_1 = Conv2D(192, (3, 3), activation='relu', name='conv_44_1') \ (alex_net.splittensor(ratio_split=2, id_split=0)(conv_4)) conv_4_2 = Conv2D(192, (3, 3), activation='relu', name='conv_44_2') \ (alex_net.splittensor(ratio_split=2, id_split=1)(conv_4)) conv_4 = Concatenate(axis=1, name='conv_44')([conv_4_1, conv_4_2]) conv_5 = ZeroPadding2D((1, 1))(conv_4) conv_5_1 = Conv2D(128, (3, 3), activation='relu', name='conv_55_1') \ (alex_net.splittensor(ratio_split=2, id_split=0)(conv_5)) conv_5_2 = Conv2D(128, (3, 3), activation='relu', name='conv_55_2') \ (alex_net.splittensor(ratio_split=2, id_split=1)(conv_5)) conv_5 = Concatenate(axis=1, name='conv_55')([conv_5_1, conv_5_2]) dense_1 = MaxPooling2D((3, 3), strides=(2, 2), name='convpool_5')(conv_5) dense_1 = Flatten(name='flatten')(dense_1) dense_1 = Dense(4096, activation='relu', name='dense_11')(dense_1) dense_2 = Dropout(0.5)(dense_1) dense_2 = Dense(4096, activation='relu', name='dense_22')(dense_2) dense_3 = Dropout(0.5)(dense_2) dense_3 = Dense(1000, name='dense_33')(dense_3) prediction = Activation('softmax', name='softmax')(dense_3) model = Model(inputs=input_layer, outputs=prediction) if weights_file: model.load_weights(weights_file) return model
output_shape=output_shape_words)(I) tags = Lambda(function=lambda x: x[:, 20:20 + 20], output_shape=output_shape_tags)(I) labels = Lambda(function=lambda x: x[:, 40:40 + 41], output_shape=output_shape_labels)(I) embedding_words = Reshape(target_shape=(20 * 64, ))(Embedding( input_dim=len(index_of_words), output_dim=64, input_length=20, )(words)) embedding_tags = Reshape(target_shape=(32 * 20, ))(Embedding( input_dim=len(index_of_pos), output_dim=32, input_length=20)(tags)) embedding_labels = Reshape(target_shape=(32 * 12, ))(Embedding( input_dim=len(index_of_labels), output_dim=32, input_length=12)(labels)) embeddings = Concatenate(axis=1)( [embedding_words, embedding_tags, embedding_labels]) h1 = Dense(units=500, activation='relu')(embeddings) h1_do = Dropout(0.3)(h1) h2 = Dense(units=500, activation='relu')(h1_do) h2_do = Dropout(0.3)(h2) q = Dense(units=93, activation='softmax')(h2_do) model = Model(inputs=[I], outputs=[q]) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) print(model.summary()) model.fit(train_data, train_labels, epochs=15, batch_size=1000) model.save(filepath='./models/model3')
def make_megnet_model(nfeat_edge=None, nfeat_global=None, nfeat_node=None, nblocks=3, n1=64, n2=32, n3=16, nvocal=95, embedding_dim=16, nbvocal=None, bond_embedding_dim=None, ngvocal=None, global_embedding_dim=None, npass=3, ntarget=1, act=softplus2, is_classification=False, l2_coef=None, dropout=None, dropout_on_predict=False): """Make a MEGNet Model Args: nfeat_edge: (int) number of bond features nfeat_global: (int) number of state features nfeat_node: (int) number of atom features nblocks: (int) number of MEGNetLayer blocks n1: (int) number of hidden units in layer 1 in MEGNetLayer n2: (int) number of hidden units in layer 2 in MEGNetLayer n3: (int) number of hidden units in layer 3 in MEGNetLayer nvocal: (int) number of total element embedding_dim: (int) number of embedding dimension nbvocal: (int) number of bond types if bond attributes are types bond_embedding_dim: (int) number of bond embedding dimension ngvocal: (int) number of global types if global attributes are types global_embedding_dim: (int) number of global embedding dimension npass: (int) number of recurrent steps in Set2Set layer ntarget: (int) number of output targets act: (object) activation function l2_coef: (float or None) l2 regularization parameter is_classification: (bool) whether it is a classification task dropout: (float) dropout rate dropout_on_predict (bool): Whether to use dropout during prediction and training Returns: (Model) Keras model, ready to run """ # Get the setting for the training kwarg of Dropout dropout_training = True if dropout_on_predict else None # Create the input blocks int32 = 'int32' if nfeat_node is None: x1 = Input(shape=(None, ), dtype=int32) # only z as feature x1_ = Embedding(nvocal, embedding_dim)(x1) else: x1 = Input(shape=(None, nfeat_node)) x1_ = x1 if nfeat_edge is None: x2 = Input(shape=(None, ), dtype=int32) x2_ = Embedding(nbvocal, bond_embedding_dim)(x2) else: x2 = Input(shape=(None, nfeat_edge)) x2_ = x2 if nfeat_global is None: x3 = Input(shape=(None, ), dtype=int32) x3_ = Embedding(ngvocal, global_embedding_dim)(x3) else: x3 = Input(shape=(None, nfeat_global)) x3_ = x3 x4 = Input(shape=(None, ), dtype=int32) x5 = Input(shape=(None, ), dtype=int32) x6 = Input(shape=(None, ), dtype=int32) x7 = Input(shape=(None, ), dtype=int32) if l2_coef is not None: reg = l2(l2_coef) else: reg = None # two feedforward layers def ff(x, n_hiddens=[n1, n2]): out = x for i in n_hiddens: out = Dense(i, activation=act, kernel_regularizer=reg)(out) return out # a block corresponds to two feedforward layers + one MEGNetLayer layer # Note the first block does not contain the feedforward layer since # it will be explicitly added before the block def one_block(a, b, c, has_ff=True): if has_ff: x1_ = ff(a) x2_ = ff(b) x3_ = ff(c) else: x1_ = a x2_ = b x3_ = c out = MEGNetLayer( [n1, n1, n2], [n1, n1, n2], [n1, n1, n2], pool_method='mean', activation=act, kernel_regularizer=reg)([x1_, x2_, x3_, x4, x5, x6, x7]) x1_temp = out[0] x2_temp = out[1] x3_temp = out[2] if dropout: x1_temp = Dropout(dropout)(x1_temp, training=dropout_training) x2_temp = Dropout(dropout)(x2_temp, training=dropout_training) x3_temp = Dropout(dropout)(x3_temp, training=dropout_training) return x1_temp, x2_temp, x3_temp x1_ = ff(x1_) x2_ = ff(x2_) x3_ = ff(x3_) for i in range(nblocks): if i == 0: has_ff = False else: has_ff = True x1_1 = x1_ x2_1 = x2_ x3_1 = x3_ x1_1, x2_1, x3_1 = one_block(x1_1, x2_1, x3_1, has_ff) # skip connection x1_ = Add()([x1_, x1_1]) x2_ = Add()([x2_, x2_1]) x3_ = Add()([x3_, x3_1]) # set2set for both the atom and bond node_vec = Set2Set(T=npass, n_hidden=n3, kernel_regularizer=reg)([x1_, x6]) edge_vec = Set2Set(T=npass, n_hidden=n3, kernel_regularizer=reg)([x2_, x7]) # concatenate atom, bond, and global final_vec = Concatenate(axis=-1)([node_vec, edge_vec, x3_]) if dropout: final_vec = Dropout(dropout)(final_vec, training=dropout_training) # final dense layers final_vec = Dense(n2, activation=act, kernel_regularizer=reg)(final_vec) final_vec = Dense(n3, activation=act, kernel_regularizer=reg)(final_vec) if is_classification: final_act = 'sigmoid' else: final_act = None out = Dense(ntarget, activation=final_act)(final_vec) model = Model(inputs=[x1, x2, x3, x4, x5, x6, x7], outputs=out) return model
def create_model( inshape: Tuple[int, int, int], n_classes: int, output_activation: str, block_structure: Tuple[int, ...] = config_sections.DEFAULT_BLOCK_STRUCTURE, filters: int = config_sections.DEFAULT_FILTERS, internal_activation: str = config_sections.DEFAULT_INTERNAL_ACTIVATION, kernel_size: Tuple[int, int] = config_sections.DEFAULT_KERNEL_SIZE, padding: str = config_sections.DEFAULT_PADDING, pool_size: Tuple[int, int] = config_sections.DEFAULT_POOL_SIZE, use_batch_norm: bool = config_sections.DEFAULT_USE_BATCH_NORM, use_growth: bool = config_sections.DEFAULT_USE_GROWTH, use_initial_colorspace_transformation_layer: bool = config_sections. DEFAULT_USE_INITIAL_COLORSPACE_TRANSFORMATION_LAYER, ) -> keras.models.Model: conv2d_options = { "filters": filters, "kernel_size": kernel_size, "padding": padding, "activation": internal_activation, "use_batch_norm": use_batch_norm, } transition_options = conv2d_options.copy() transition_options["kernel_size"] = (1, 1) # Initial convolution inlayer = keras.layers.Input(shape=inshape) encoder = inlayer # Optional colorspace transformation (not in block format) if use_initial_colorspace_transformation_layer: encoder = network_sections.colorspace_transformation( inshape, encoder, use_batch_norm) # Encoding, block-wise passthrough_layers = list() for num_layers in block_structure: # Create a dense block encoder = network_sections.dense_2d_block(encoder, conv2d_options, num_layers) passthrough_layers.append(encoder) # Add a transition block encoder = network_sections.Conv2D_Options(encoder, transition_options) # Pool encoder = MaxPooling2D(pool_size=pool_size)(encoder) if use_growth: conv2d_options["filters"] *= 2 transition_options["filters"] *= 2 # Encoder/Decoder Transition Block transition = network_sections.dense_2d_block(encoder, conv2d_options, block_structure[-1]) decoder = transition # Decoding, block-wise for num_layers, layer_passed_through in zip(reversed(block_structure), reversed(passthrough_layers)): if use_growth: conv2d_options["filters"] = int(conv2d_options["filters"] / 2) transition_options["filters"] = int(transition_options["filters"] / 2) # Upsample decoder = UpSampling2D(size=pool_size, interpolation="bilinear")(decoder) # Create dense block and concatenate decoder = network_sections.Conv2D_Options(decoder, conv2d_options) decoder = Concatenate()([layer_passed_through, decoder]) # Add a transition block decoder = network_sections.dense_2d_block(decoder, transition_options, num_layers) # Output convolutions output_layer = decoder output_layer = network_sections.Conv2D_Options(output_layer, conv2d_options) output_layer = Conv2D(filters=n_classes, kernel_size=(1, 1), padding="same", activation=output_activation)(output_layer) return keras.models.Model(inputs=[inlayer], outputs=[output_layer])
n_samples = 2 dx = 2 dy = 3 dout = 7 mask_value = -1 X = np.random.randint(5, size=(n_samples, dx, dy)) X[1, 0, :] = mask_value inp = Input(shape=(dx, dy)) x = Masking(mask_value=-1.0)(inp) import pdb pdb.set_trace() lstm_fw = LSTM(dout, return_sequences=True, go_backwards=False)(x) lstm_bw = LSTM(dout, return_sequences=True, go_backwards=True)(x) concat = Concatenate(axis=-1)([lstm_fw, lstm_bw]) model_3 = Model(inputs=inp, outputs=concat) model_3.summary() model_3.set_weights( [np.ones(l.shape) * i for i, l in enumerate(model_3.get_weights(), 2)]) model_3.compile(optimizer="rmsprop", loss="mae") y_true = np.ones((n_samples, dx, model_3.layers[-1].output_shape[-1])) y_pred_3 = model_3.predict(X) print(y_pred_3) unmasked_loss = np.abs(1 - y_pred_3).mean() masked_loss = np.abs(1 - y_pred_3[y_pred_3 != 0.0]).mean() keras_loss = model_3.evaluate(X, y_true, verbose=0) print(f"unmasked loss: {unmasked_loss}") print(f"masked loss: {masked_loss}") print(f"evaluate with Keras: {keras_loss}")
def neural_extractor1(data, categories, aspects, text_to_predict): for i in range(0, len(data)): flag = 0 temp_sent = [] text = data[i] words = nltk.word_tokenize(text) pos = [] for word in nltk.pos_tag(words): parts_of_speech[word[1]] = 1 pos.append(word[1]) tags = ['O' for ff in range(0, len(words))] for aspect in aspects[i]: asp_words = nltk.word_tokenize(aspect.lower()) j = 0 k = 0 # flag=0 while (k < len(asp_words)): while (j < len(words)): if (asp_words[k] == words[j] and tags[j] == 'O'): if (k == 0): tags[j] = 'B' else: tags[j] = 'I' # if(flag==0): # tags[j]='B' # flag=1 # else: # tags[j]='I' k += 1 if (k >= len(asp_words)): break j += 1 k += 1 for ii in range(0, len(words)): temp_sent.append((words[ii], pos[ii], tags[ii])) sentences.append(temp_sent) print(len(sentences)) for i in range(0, len(data)): tokens = nltk.word_tokenize(data[i]) string = ' '.join(tokens) data[i] = string #data.append(' '.join(words_to_predict)) #lll=len(data)-1 data.append("ENDPAD") tokenizer = Tokenizer() tokenizer.fit_on_texts(data) sequences = tokenizer.texts_to_sequences(data) word_index = tokenizer.word_index X = pad_sequences(sequences[:-1], maxlen=50, padding="post", value=word_index["endpad"]) validation_size = int(0.2 * X.shape[0]) #print(X_to_predict) n_words = len(word_index) tag_list = ['B', 'I', 'O', 'P'] n_tags = len(tag_list) embedding_matrix = np.zeros((n_words, 300)) for word, i in word_index.items(): if (i >= len(word_index)): continue if word in glove_emb: embedding_matrix[i] = glove_emb[word] max_len = 50 tag2idx = {t: i for i, t in enumerate(tag_list)} idx2word = {t: i for i, t in word_index.items()} pos2idx = {t: i for i, t in enumerate(parts_of_speech.keys())} y = [[tag2idx[w[2]] for w in s] for s in sentences] y = pad_sequences(maxlen=max_len, sequences=y, padding="post", value=tag2idx["P"]) y = [to_categorical(i, num_classes=n_tags) for i in y] pos = [[pos2idx[w[1]] for w in s] for s in sentences] pos1 = pad_sequences(maxlen=max_len, sequences=pos, padding="post", value=len(parts_of_speech.keys()) + 1) pos = np.asarray([np.reshape(i, (max_len, 1)) for i in pos1]) # indices=np.arange(X.shape[0]) # np.random.shuffle(indices) # X=X[indices] # y=y[indices] #validation_size=int(0.2*X.shape[0]) X_tr = X[:-validation_size] tr_pos = pos[:-validation_size] y_tr = y[:-validation_size] X_te = X[-validation_size:] te_pos = pos[-validation_size:] y_te = y[-validation_size:] X_to_predict = X[-validation_size:] pos_to_predict = pos[-validation_size:] # X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.1) vocab_size = len(word_index) e = Input(shape=(max_len, )) emb = Embedding(vocab_size, 300, weights=[embedding_matrix], input_length=50, mask_zero=True, trainable=False)(e) ad_pos = Input(shape=(max_len, 1)) co_tm = Concatenate()([emb] + [ad_pos]) bi_gru = Bidirectional(GRU(50, return_sequences=True))(emb) out = Dense(25, activation='relu')(bi_gru) # out=Dropout(0.1)(out) out = TimeDistributed(Dense(n_tags, activation='softmax'))(out) model = Model(inputs=[e, ad_pos], outputs=[out]) model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy']) model.fit([X, pos], np.array(y), batch_size=25, epochs=20, validation_data=([X_te, te_pos], np.array(y_te)), verbose=1) pos_tp = np.asarray([np.asarray([1 for i in range(0, 50)]).reshape(50, 1)]) #model=load_model('aspect_extractor.h5') #with open('aspect.json') as ff: # model_json=ff.read() # model=keras.models.model_from_json(model_json) #model.compile(loss='categorical_crossentropy',optimizer="rmsprop",metrics=['accuracy']) #model.load_weights('aspect_weights.h5') #model.fit([X], np.array(y), batch_size=25, epochs=15, validation_data=([X_te],np.array(y_te)), verbose=0) #print(X_to_predict,X_to_predict.shape) p1 = model.predict([X_to_predict, pos_to_predict]) #p1=model.predict([X_to_predict]) #print(p1) pred_aspects = [] for i in range(0, len(p1)): p = np.argmax(p1[i], axis=-1) temp1 = [] flag = 0 string1 = "" for j in range(0, len(p)): #print(idx2word[X_to_predict[i][j]],tag_list[p[j]]) if (idx2word[X_to_predict[i][j]] == "endpad"): break if (tag_list[p[j]] == 'B'): string1 += idx2word[X_to_predict[i][j]] + " " if (flag == 0): flag = 1 elif (tag_list[p[j]] == 'I'): string1 += idx2word[X_to_predict[i][j]] + " " elif (tag_list[p[j]] == 'O'): if (string1 != ""): temp1.append(string1) string1 = "" flag = 0 pred_aspects.append(temp1) #print(pred_aspects) return pred_aspects # print(aspects[:-validation_size][69]) # for i in range(0,20): # print(aspects[i],pred_aspects[i]) # p=np.argmax(p,axis=-1) # true_p=np.argmax(y_tr[69],axis=-1) # for i in range(0,len(p)): # print(true_p[i],p[i]) #for w, pred in zip(X_to_predict[0], p1[0]): # print(idx2word[w], tag_list[pred])
dataset_preprocessor = datasets.flickr.PreProcessing(cfg) MAX_LEN = 40 EMBEDDING_DIM = 300 IMAGE_ENC_DIM = 300 vocab_size = get_line_count(os.path.join(cfg["workspace"]["directory"], "word_dictionary.txt")) img_input = Input(shape=(2048,)) img_enc = Dense(300, activation="relu")(img_input) images = RepeatVector(MAX_LEN)(img_enc) # Text input text_input = Input(shape=(MAX_LEN,)) embedding = Embedding(vocab_size, EMBEDDING_DIM, input_length=MAX_LEN)(text_input) x = Concatenate()([images, embedding]) y = Bidirectional(LSTM(256, return_sequences=False))(x) pred = Dense(vocab_size, activation='softmax')(y) model = Model(inputs=[img_input, text_input], outputs=pred) model.compile(loss='categorical_crossentropy', optimizer="RMSProp", metrics=['accuracy']) model.summary() training_generator, validation_generator, test_generator = dataset_preprocessor.get_keras_generators("inception") model.fit_generator(generator=training_generator, validation_data=validation_generator, epochs=1) model.save_weights(os.path.join(cfg["workspace"]["directory"], cfg["model"]["arch"]+"_model.h5")) print("Saved model to disk") model.load_weights(os.path.join(cfg["workspace"]["directory"], cfg["model"]["arch"]+"_model.h5"))
def __init__(self, axis, **kwargs): kwargs.pop(str('config')) Concatenate.__init__(self, axis, **kwargs)
def train(self): ####################################################### ### DESIGN MODEL FOR TRAINING ####################################################### print('> Desgin Model for Training') ########### ### Encoder embedding_layer = Embedding( params['LEN_WORD2IDX_INPUTS'] + 1, params['EMBEDDING_DIM'], #weights=[embedding_matrix], input_length=params['MAX_LEN_INPUT'], # trainable=True ) encoder_inputs_placeholder = Input(shape=(params['MAX_LEN_INPUT'], )) x = embedding_layer(encoder_inputs_placeholder) encoder = Bidirectional( LSTM( params['LATENT_DIM'], return_sequences=True, # dropout=0.5 # dropout not available on gpu )) encoder_outputs = encoder(x) ########### ### Decoder # this word embedding will not use pre-trained vectors, although you could decoder_embedding = Embedding(params['LEN_WORD2IDX_OUTPUTS'] + 1, params['EMBEDDING_DIM']) decoder_inputs_placeholder = Input( shape=(params['MAX_LEN_TARGET'], )) # teacher forcing input decoder_inputs_x = decoder_embedding(decoder_inputs_placeholder) ############# ### Attention # Attention layers need to be global (전역 변수) because they will be repeated Ty times at the decoder attn_repeat_layer = RepeatVector(params['MAX_LEN_INPUT']) attn_concat_layer = Concatenate(axis=-1) attn_dense1 = Dense(10, activation='tanh') attn_dense2 = Dense(1, activation=self._softmax_over_time) attn_dot = Dot( axes=1) # to perform the weighted sum of alpha[t] * h[t] def _one_step_attention(h, st_1): # h = h(1), ..., h(Tx), shape = (Tx, LATENT_DIM * 2) # st_1 = s(t-1), shape = (LATENT_DIM_DECODER,) # copy s(t-1) Tx times # now shape = (Tx, LATENT_DIM_DECODER) st_1 = attn_repeat_layer(st_1) # Concatenate all h(t)'s with s(t-1) # Now of shape (Tx, LATENT_DIM_DECODER + LATENT_DIM * 2) x = attn_concat_layer([h, st_1]) # Neural net first layer x = attn_dense1(x) # Neural net second layer with special softmax over time alphas = attn_dense2(x) # "Dot" the alphas and the h's # Remember a.dot(b) = sum over a[t] * b[t] context = attn_dot([alphas, h]) return context # define the rest of the decoder (after attention) decoder_lstm = LSTM(params['LATENT_DIM_DECODER'], return_state=True) decoder_dense = Dense(params['LEN_WORD2IDX_OUTPUTS'] + 1, activation='softmax') initial_s = Input(shape=(params['LATENT_DIM_DECODER'], ), name='s0') initial_c = Input(shape=(params['LATENT_DIM_DECODER'], ), name='c0') context_last_word_concat_layer = Concatenate( axis=2) # for teacher forcing # Unlike previous seq2seq, we cannot get the output all in one step # Instead we need to do Ty steps And in each of those steps, we need to consider all Tx h's # s, c will be re-assigned in each iteration of the loop s = initial_s c = initial_c # collect outputs in a list at first outputs = [] # 원래 LSTM은 내부적으로 아래와 같은 for문을 진행하지만, 여기서 우리는 Context를 계산하기 위해서 manual하게 for문을 구성함. for t in range(params['MAX_LEN_TARGET']): # Ty times ###################################################### ## `one_step_attention` function ! # get the context using attention context = _one_step_attention(encoder_outputs, s) # we need a different layer for each time step selector = Lambda( lambda x: x[:, t:t + 1] ) # 해당 time 벡터만 추출. 우리는 layer-wise로 코딩해야 되기 때문에 lambda를 사용. xt = selector(decoder_inputs_x) # combine decoder_lstm_input = context_last_word_concat_layer([context, xt]) # pass the combined [context, last word] into the LSTM # along with [s, c] # get the new [s, c] and output o, s, c = decoder_lstm(decoder_lstm_input, initial_state=[s, c]) # final dense layer to get next word prediction decoder_outputs = decoder_dense(o) outputs.append(decoder_outputs) # make it a layer stacker = Lambda(self._stack_and_transpose) outputs = stacker(outputs) ######### ### Model self.model = Model(inputs=[ encoder_inputs_placeholder, decoder_inputs_placeholder, initial_s, initial_c, ], outputs=outputs) # compile the model self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) ####################################################### ### TRAINING ####################################################### print('> Train Model Start...') # create targets, since we cannot use sparse # categorical cross entropy when we have sequences decoder_targets_one_hot = np.zeros( (params['LEN_INPUT_TEXTS'], params['MAX_LEN_TARGET'], params['LEN_WORD2IDX_OUTPUTS'] + 1), dtype='float32') # assign the values for i, d in enumerate(self.decoder_targets): for t, word in enumerate(d): decoder_targets_one_hot[i, t, word] = 1 # train the model z = np.zeros((params['LEN_INPUT_TEXTS'], params['LATENT_DIM_DECODER'])) # initial [s, c] r = self.model.fit( [self.encoder_inputs, self.decoder_inputs, z, z], decoder_targets_one_hot, batch_size=params['BATCH_SIZE'], epochs=params['EPOCHS'], validation_split=0.15, callbacks=[EarlyStopping(monitor='val_loss', patience=10)] # early stopping ) # save trained model's weights #model_json = model.to_json() #with open(cur_path + "/test_model.json", "w") as json_f: # json_f.write(model_json) self.model.save_weights(CUR_PATH + '/resources/' + YEARMONTHDAY + "_model.h5") log(">> Saved model's weight") # save plot-data # plot some data #plt.figure() #plt.plot(r.history['loss'], label='loss') #plt.plot(r.history['val_loss'], label='val_loss') #plt.legend() #plt.show() plt.savefig(CUR_PATH + '/resources/' + 'loss_plot.png') # accuracies #plt.figure() #plt.plot(r.history['acc'], label='acc') #plt.plot(r.history['val_acc'], label='val_acc') #plt.legend() #plt.show() plt.savefig(CUR_PATH + '/resources/' + 'acc_plot.png') ####################################################### ### DESIGN MODEL FOR PREDICTION ####################################################### log('> Desgin Model for Prediction') ##### Make predictions ##### # As with the poetry example, we need to create another model # that can take in the RNN state and previous word as input # and accept a T=1 sequence. #encoder_inputs_placeholder = Input(shape=(params['MAX_LEN_INPUT'],)) # The encoder will be stand-alone # From this we will get our initial decoder hidden state # i.e. h(1), ..., h(Tx) self.encoder_model = Model(encoder_inputs_placeholder, encoder_outputs) # next we define a T=1 decoder model encoder_outputs_as_input = Input(shape=( params['MAX_LEN_INPUT'], params['LATENT_DIM'] * 2, )) decoder_inputs_single = Input(shape=(1, )) decoder_inputs_single_x = decoder_embedding(decoder_inputs_single) # no need to loop over attention steps this time because there is only one step context = _one_step_attention(encoder_outputs_as_input, initial_s) # combine context with last word decoder_lstm_input = context_last_word_concat_layer( [context, decoder_inputs_single_x]) # lstm and final dense o, s, c = decoder_lstm(decoder_lstm_input, initial_state=[initial_s, initial_c]) decoder_outputs = decoder_dense(o) # note: we don't really need the final stack and tranpose # because there's only 1 output # it is already of size N x D # no need to make it 1 x N x D --> N x 1 x D # time dimension이 1이기 때문에 자동으로 없어짐: 따라서, stack_and_transpose함수가 필요없음. # create the model object self.decoder_model = Model(inputs=[ decoder_inputs_single, encoder_outputs_as_input, initial_s, initial_c ], outputs=[decoder_outputs, s, c])
from keras.models import Sequential from keras.layers import Dense, Embedding, Reshape, Concatenate from sys import argv num_users = 1000 num_rooms = 2000 user_attr = 5 room_attr = 3 input1 = Sequential() input1.add(Embedding(num_users + 1, user_attr, input_length=1)) input1.add(Reshape((user_attr, ))) input2 = Sequential() input2.add(Embedding(num_rooms, room_attr, input_length=1)) input2.add(Reshape((room_attr, ))) model = Sequential() model.add(Concatenate([input1, input2])) model.add(Dense(user_attr + room_attr, activation='relu')) model.add(Dense(user_attr + room_attr, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.load_weights('user' + str(argv[1]) + '_weights.h5') res = model.predict([np.array(argv[1]), np.array(argv[2])])
def ssd_300(image_size, n_classes, mode='training', l2_regularization=0.0005, min_scale=None, max_scale=None, scales=None, aspect_ratios_global=None, aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=None, limit_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], coords='centroids', normalize_coords=False, subtract_mean=[123, 117, 104], divide_by_stddev=None, swap_channels=True, confidence_thresh=0.01, iou_threshold=0.45, top_k=200, nms_max_output_size=400, return_predictor_sizes=False): ''' Build a Keras model with SSD300 architecture, see references. The base network is a reduced atrous VGG-16, extended by the SSD architecture, as described in the paper. Most of the arguments that this function takes are only needed for the anchor box layers. In case you're training the network, the parameters passed here must be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading trained weights, the parameters passed here must be the same as the ones used to produce the trained weights. Some of these arguments are explained in more detail in the documentation of the `SSDBoxEncoder` class. Note: Requires Keras v2.0 or later. Currently works only with the TensorFlow backend (v1.0 or later). Arguments: image_size (tuple): The input image size in the format `(height, width, channels)`. n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO. mode (str, optional): One of 'training', 'inference' and 'inference_fast'. In 'training' mode, the model outputs the raw prediction tensor, while in 'inference' and 'inference_fast' modes, the raw predictions are decoded into absolute coordinates and filtered via confidence thresholding, non-maximum suppression, and top-k filtering. The difference between latter two modes is that 'inference' follows the exact procedure of the original Caffe implementation, while 'inference_fast' uses a faster prediction decoding procedure. l2_regularization (float, optional): The L2-regularization rate. Applies to all convolutional layers. Set to zero to deactivate L2-regularization. min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. All scaling factors between the smallest and the largest will be linearly interpolated. Note that the second to last of the linearly interpolated scaling factors will actually be the scaling factor for the last predictor layer, while the last scaling factor is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer. This list must be one element longer than the number of predictor layers. The first `k` elements are the scaling factors for the `k` predictor layers, while the last element is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional last scaling factor must be passed either way, even if it is not being used. Defaults to `None`. If a list is passed, this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero. aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be generated. This list is valid for all prediction layers. Defaults to None. aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each prediction layer. This allows you to set the aspect ratios for each predictor layer individually, which is the case for the original SSD300 implementation. If a list is passed, it overrides `aspect_ratios_global`. Defaults to the aspect ratios used in the original SSD300 architecture, i.e.: [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise. If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated using the scaling factor for the respective layer, the second one will be generated using geometric mean of said scaling factor and next bigger scaling factor. Defaults to `True`, following the original implementation. steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over the image. If the list contains ints/floats, then that value will be used for both spatial dimensions. If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`. If no steps are provided, then they will be computed such that the anchor box center points will form an equidistant grid within the image dimensions. offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be either floats or tuples of two floats. These numbers represent for each predictor layer how many pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions of the step size specified in the `steps` argument. If the list contains floats, then that value will be used for both spatial dimensions. If the list contains tuples of two floats, then they represent `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size. limit_boxes (bool, optional): If `True`, limits box coordinates to stay within image boundaries. This would normally be set to `True`, but here it defaults to `False`, following the original implementation. variances (list, optional): A list of 4 floats >0 with scaling factors (actually it's not factors but divisors to be precise) for the encoded predicted box coordinates. A variance value of 1.0 would apply no scaling at all to the predictions, while values in (0,1) upscale the encoded predictions and values greater than 1.0 downscale the encoded predictions. Defaults to `[0.1, 0.1, 0.2, 0.2]`, following the original implementation. The coordinate format must be 'centroids'. coords (str, optional): The box coordinate format to be used. Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height) or 'minmax' for the format `(xmin, xmax, ymin, ymax)`. Defaults to 'centroids', following the original implementation. normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates, i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. Defaults to `False`. subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values of any shape that is broadcast-compatible with the image shape. The elements of this array will be subtracted from the image pixel intensity values. For example, pass a list of three integers to perform per-channel mean normalization for color images. divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or floating point values of any shape that is broadcast-compatible with the image shape. The image pixel intensity values will be divided by the elements of this array. For example, pass a list of three integers to perform per-channel standard deviation normalization for color images. swap_channels (bool, optional): If `True`, the color channel order of the input images will be reversed, i.e. if the input color channel order is RGB, the color channels will be swapped to BGR. confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific positive class in order to be considered for the non-maximum suppression stage for the respective class. A lower value will result in a larger part of the selection process being done by the non-maximum suppression stage, while a larger value will result in a larger part of the selection process happening in the confidence thresholding stage. iou_threshold (float, optional): A float in [0,1]. All boxes that have a Jaccard similarity of greater than `iou_threshold` with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers to the box's confidence score. top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the non-maximum suppression stage. nms_max_output_size (int, optional): The maximal number of predictions that will be left over after the NMS stage. return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since you can always get their sizes easily via the Keras API, but it's convenient and less error-prone to get them this way. They are only relevant for training anyway (SSDBoxEncoder needs to know the spatial dimensions of the predictor layers), for inference you don't need them. Returns: model: The Keras SSD300 model. predictor_sizes (optional): A Numpy array containing the `(height, width)` portion of the output tensor shape for each convolutional predictor layer. During training, the generator function needs this in order to transform the ground truth labels into tensors of identical structure as the output tensors of the model, which is in turn needed for the cost function. References: https://arxiv.org/abs/1512.02325v5 ''' n_predictor_layers = 6 # The number of predictor conv layers in the network is 6 for the original SSD300. n_classes += 1 # Account for the background class. l2_reg = l2_regularization # Make the internal name shorter. img_height, img_width, img_channels = image_size[0], image_size[ 1], image_size[2] ############################################################################ # Get a few exceptions out of the way. ############################################################################ if aspect_ratios_global is None and aspect_ratios_per_layer is None: raise ValueError( "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified." ) if aspect_ratios_per_layer: if len(aspect_ratios_per_layer) != n_predictor_layers: raise ValueError( "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}." .format(n_predictor_layers, len(aspect_ratios_per_layer))) if (min_scale is None or max_scale is None) and scales is None: raise ValueError( "Either `min_scale` and `max_scale` or `scales` need to be specified." ) if scales: if len(scales) != n_predictor_layers + 1: raise ValueError( "It must be either scales is None or len(scales) == {}, but len(scales) == {}." .format(n_predictor_layers + 1, len(scales))) else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1) if len(variances) != 4: raise ValueError( "4 variance values must be pased, but {} values were received.". format(len(variances))) variances = np.array(variances) if np.any(variances <= 0): raise ValueError( "All variances must be >0, but the variances given are {}".format( variances)) if (not (steps is None)) and (len(steps) != n_predictor_layers): raise ValueError( "You must provide at least one step value per predictor layer.") if (not (offsets is None)) and (len(offsets) != n_predictor_layers): raise ValueError( "You must provide at least one offset value per predictor layer.") ############################################################################ # Compute the anchor box parameters. ############################################################################ # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. if aspect_ratios_per_layer: aspect_ratios = aspect_ratios_per_layer else: aspect_ratios = [aspect_ratios_global] * n_predictor_layers # Compute the number of boxes to be predicted per cell for each predictor layer. # We need this so that we know how many channels the predictor layers need to have. if aspect_ratios_per_layer: n_boxes = [] for ar in aspect_ratios_per_layer: if (1 in ar) & two_boxes_for_ar1: n_boxes.append(len(ar) + 1) # +1 for the second box for aspect ratio 1 else: n_boxes.append(len(ar)) else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer if (1 in aspect_ratios_global) & two_boxes_for_ar1: n_boxes = len(aspect_ratios_global) + 1 else: n_boxes = len(aspect_ratios_global) n_boxes = [n_boxes] * n_predictor_layers if steps is None: steps = [None] * n_predictor_layers if offsets is None: offsets = [None] * n_predictor_layers ############################################################################ # Build the network. ############################################################################ x = Input(shape=(img_height, img_width, img_channels)) # The following identity layer is only needed so that the subsequent lambda layers can be optional. x1 = Lambda(lambda z: z, output_shape=(img_height, img_width, img_channels), name='identity_layer')(x) if not (subtract_mean is None): x1 = Lambda(lambda z: z - np.array(subtract_mean), output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1) if not (divide_by_stddev is None): x1 = Lambda(lambda z: z / np.array(divide_by_stddev), output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1) if swap_channels and (img_channels == 3): x1 = Lambda(lambda z: z[..., ::-1], output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1) conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_1')(x1) conv1_2 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_2')(conv1_1) pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool1')(conv1_2) conv2_1 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_1')(pool1) conv2_2 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_2')(conv2_1) pool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool2')(conv2_2) conv3_1 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_1')(pool2) conv3_2 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_2')(conv3_1) conv3_3 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_3')(conv3_2) pool3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3')(conv3_3) conv4_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_1')(pool3) conv4_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_2')(conv4_1) conv4_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3')(conv4_2) pool4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool4')(conv4_3) conv5_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_1')(pool4) conv5_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_2')(conv5_1) conv5_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_3')(conv5_2) pool5 = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same', name='pool5')(conv5_3) fc6 = Conv2D(1024, (3, 3), dilation_rate=(6, 6), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc6')(pool5) fc7 = Conv2D(1024, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7')(fc6) conv6_1 = Conv2D(256, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_1')(fc7) conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv6_padding')(conv6_1) conv6_2 = Conv2D(512, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2')(conv6_1) conv7_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_1')(conv6_2) conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv7_padding')(conv7_1) conv7_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2')(conv7_1) conv8_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_1')(conv7_2) conv8_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2')(conv8_1) conv9_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_1')(conv8_2) conv9_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2')(conv9_1) # Feed conv4_3 into the L2 normalization layer conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3) ### Build the convolutional predictor layers on top of the base network # We precidt `n_classes` confidence values for each box, hence the confidence predictors have depth `n_boxes * n_classes` # Output shape of the confidence layers: `(batch, height, width, n_boxes * n_classes)` conv4_3_norm_mbox_conf = Conv2D( n_boxes[0] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3_norm_mbox_conf')(conv4_3_norm) fc7_mbox_conf = Conv2D(n_boxes[1] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7_mbox_conf')(fc7) conv6_2_mbox_conf = Conv2D(n_boxes[2] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2_mbox_conf')(conv6_2) conv7_2_mbox_conf = Conv2D(n_boxes[3] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2_mbox_conf')(conv7_2) conv8_2_mbox_conf = Conv2D(n_boxes[4] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2_mbox_conf')(conv8_2) conv9_2_mbox_conf = Conv2D(n_boxes[5] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2_mbox_conf')(conv9_2) # We predict 4 box coordinates for each box, hence the localization predictors have depth `n_boxes * 4` # Output shape of the localization layers: `(batch, height, width, n_boxes * 4)` conv4_3_norm_mbox_loc = Conv2D(n_boxes[0] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3_norm_mbox_loc')(conv4_3_norm) fc7_mbox_loc = Conv2D(n_boxes[1] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7_mbox_loc')(fc7) conv6_2_mbox_loc = Conv2D(n_boxes[2] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2_mbox_loc')(conv6_2) conv7_2_mbox_loc = Conv2D(n_boxes[3] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2_mbox_loc')(conv7_2) conv8_2_mbox_loc = Conv2D(n_boxes[4] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2_mbox_loc')(conv8_2) conv9_2_mbox_loc = Conv2D(n_boxes[5] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2_mbox_loc')(conv9_2) ### Generate the anchor boxes (called "priors" in the original Caffe/C++ implementation, so I'll keep their layer names) # Output shape of anchors: `(batch, height, width, n_boxes, 8)` conv4_3_norm_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[0], this_offsets=offsets[0], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv4_3_norm_mbox_priorbox')(conv4_3_norm_mbox_loc) fc7_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[1], this_offsets=offsets[1], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='fc7_mbox_priorbox')(fc7_mbox_loc) conv6_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[2], this_offsets=offsets[2], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv6_2_mbox_priorbox')(conv6_2_mbox_loc) conv7_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[3], this_offsets=offsets[3], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv7_2_mbox_priorbox')(conv7_2_mbox_loc) conv8_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[4], next_scale=scales[5], aspect_ratios=aspect_ratios[4], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[4], this_offsets=offsets[4], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv8_2_mbox_priorbox')(conv8_2_mbox_loc) conv9_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[5], next_scale=scales[6], aspect_ratios=aspect_ratios[5], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[5], this_offsets=offsets[5], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv9_2_mbox_priorbox')(conv9_2_mbox_loc) ### Reshape # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)` # We want the classes isolated in the last axis to perform softmax on them conv4_3_norm_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv4_3_norm_mbox_conf_reshape')(conv4_3_norm_mbox_conf) fc7_mbox_conf_reshape = Reshape( (-1, n_classes), name='fc7_mbox_conf_reshape')(fc7_mbox_conf) conv6_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv6_2_mbox_conf_reshape')(conv6_2_mbox_conf) conv7_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv7_2_mbox_conf_reshape')(conv7_2_mbox_conf) conv8_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv8_2_mbox_conf_reshape')(conv8_2_mbox_conf) conv9_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv9_2_mbox_conf_reshape')(conv9_2_mbox_conf) # Reshape the box predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)` # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss conv4_3_norm_mbox_loc_reshape = Reshape( (-1, 4), name='conv4_3_norm_mbox_loc_reshape')(conv4_3_norm_mbox_loc) fc7_mbox_loc_reshape = Reshape((-1, 4), name='fc7_mbox_loc_reshape')(fc7_mbox_loc) conv6_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv6_2_mbox_loc_reshape')(conv6_2_mbox_loc) conv7_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv7_2_mbox_loc_reshape')(conv7_2_mbox_loc) conv8_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv8_2_mbox_loc_reshape')(conv8_2_mbox_loc) conv9_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv9_2_mbox_loc_reshape')(conv9_2_mbox_loc) # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)` conv4_3_norm_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox) fc7_mbox_priorbox_reshape = Reshape( (-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox) conv6_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox) conv7_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox) conv8_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox) conv9_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox) ### Concatenate the predictions from the different layers # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions, # so we want to concatenate along axis 1, the number of boxes per layer # Output shape of `mbox_conf`: (batch, n_boxes_total, n_classes) mbox_conf = Concatenate(axis=1, name='mbox_conf')([ conv4_3_norm_mbox_conf_reshape, fc7_mbox_conf_reshape, conv6_2_mbox_conf_reshape, conv7_2_mbox_conf_reshape, conv8_2_mbox_conf_reshape, conv9_2_mbox_conf_reshape ]) # Output shape of `mbox_loc`: (batch, n_boxes_total, 4) mbox_loc = Concatenate(axis=1, name='mbox_loc')([ conv4_3_norm_mbox_loc_reshape, fc7_mbox_loc_reshape, conv6_2_mbox_loc_reshape, conv7_2_mbox_loc_reshape, conv8_2_mbox_loc_reshape, conv9_2_mbox_loc_reshape ]) # Output shape of `mbox_priorbox`: (batch, n_boxes_total, 8) mbox_priorbox = Concatenate(axis=1, name='mbox_priorbox')([ conv4_3_norm_mbox_priorbox_reshape, fc7_mbox_priorbox_reshape, conv6_2_mbox_priorbox_reshape, conv7_2_mbox_priorbox_reshape, conv8_2_mbox_priorbox_reshape, conv9_2_mbox_priorbox_reshape ]) # The box coordinate predictions will go into the loss function just the way they are, # but for the class predictions, we'll apply a softmax activation layer first mbox_conf_softmax = Activation('softmax', name='mbox_conf_softmax')(mbox_conf) # Concatenate the class and box predictions and the anchors to one large predictions vector # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8) predictions = Concatenate(axis=2, name='predictions')( [mbox_conf_softmax, mbox_loc, mbox_priorbox]) if mode == 'training': model = Model(inputs=x, outputs=predictions) elif mode == 'inference': decoded_predictions = DecodeDetections( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) elif mode == 'inference_fast': decoded_predictions = DecodeDetections2( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) else: raise ValueError( "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'." .format(mode)) if return_predictor_sizes: # Get the spatial dimensions (height, width) of the predictor conv layers, we need them to # be able to generate the default boxes for the matching process outside of the model during training. # Note that the original implementation performs anchor box matching inside the loss function. We don't do that. # Instead, we'll do it in the batch generator function. # The spatial dimensions are the same for the confidence and localization predictors, so we just take those of the conf layers. predictor_sizes = np.array([ conv4_3_norm_mbox_conf._keras_shape[1:3], fc7_mbox_conf._keras_shape[1:3], conv6_2_mbox_conf._keras_shape[1:3], conv7_2_mbox_conf._keras_shape[1:3], conv8_2_mbox_conf._keras_shape[1:3], conv9_2_mbox_conf._keras_shape[1:3] ]) return model, predictor_sizes else: return model