Example #1
0
 def __init__(self, num_splits=2, axis=-1):
     Lambda.__init__(self, lambda x: tf.split(x, num_splits, axis=axis))
Example #2
0
 def __init__(self):
     super(CrossLayer4D, self).__init__()
     self.cross = Lambda(all_vs_all_pairs_4d, name="cross_layer")
Example #3
0
    def build_model(self):
        # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.
        weight_decay = self.weight_decay
        basic_dropout_rate = 0.3
        input = Input(shape=self.x_shape)
        curr = Conv2D(64, (3, 3),
                      padding='same',
                      kernel_regularizer=regularizers.l2(weight_decay))(input)
        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = Dropout(basic_dropout_rate)(curr)

        curr = Conv2D(64, (3, 3),
                      padding='same',
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = MaxPooling2D(pool_size=(2, 2))(curr)

        curr = Conv2D(128, (3, 3),
                      padding='same',
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = Dropout(basic_dropout_rate + 0.1)(curr)

        curr = Conv2D(128, (3, 3),
                      padding='same',
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = MaxPooling2D(pool_size=(2, 2))(curr)

        curr = Conv2D(256, (3, 3),
                      padding='same',
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = Dropout(basic_dropout_rate + 0.1)(curr)

        curr = Conv2D(256, (3, 3),
                      padding='same',
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = Dropout(basic_dropout_rate + 0.1)(curr)

        curr = Conv2D(256, (3, 3),
                      padding='same',
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = MaxPooling2D(pool_size=(2, 2))(curr)

        curr = Conv2D(512, (3, 3),
                      padding='same',
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = Dropout(basic_dropout_rate + 0.1)(curr)

        curr = Conv2D(512, (3, 3),
                      padding='same',
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = Dropout(basic_dropout_rate + 0.1)(curr)

        curr = Conv2D(512, (3, 3),
                      padding='same',
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = MaxPooling2D(pool_size=(2, 2))(curr)

        curr = Conv2D(512, (3, 3),
                      padding='same',
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = Dropout(basic_dropout_rate + 0.1)(curr)

        curr = Conv2D(512, (3, 3),
                      padding='same',
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = Dropout(basic_dropout_rate + 0.1)(curr)

        curr = Conv2D(512, (3, 3),
                      padding='same',
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = MaxPooling2D(pool_size=(2, 2))(curr)
        curr = Dropout(basic_dropout_rate + 0.2)(curr)

        curr = Flatten()(curr)
        curr = Dense(512,
                     kernel_regularizer=regularizers.l2(weight_decay))(curr)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = Dropout(basic_dropout_rate + 0.2)(curr)
        curr = Lambda(lambda x: K.dropout(x, level=self.mc_dropout_rate))(curr)

        # classification head (f)
        curr1 = Dense(self.num_classes, activation='softmax')(curr)

        # selection head (g)
        curr2 = Dense(512,
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)
        curr2 = Activation('relu')(curr2)
        curr2 = BatchNormalization()(curr2)
        # this normalization is identical to initialization of batchnorm gamma to 1/10
        curr2 = Lambda(lambda x: x / 10)(curr2)
        curr2 = Dense(1, activation='sigmoid')(curr2)
        # auxiliary head (h)
        selective_output = Concatenate(axis=1,
                                       name="selective_head")([curr1, curr2])

        auxiliary_output = Dense(self.num_classes,
                                 activation='softmax',
                                 name="classification_head")(curr)

        model = Model(inputs=input,
                      outputs=[selective_output, auxiliary_output])

        self.input = input
        #         self.model_embeding = Model(inputs=input, outputs=curr)
        return model
Example #4
0
def get_yolo3_train_model(model_type,
                          anchors,
                          num_classes,
                          weights_path=None,
                          freeze_level=1,
                          optimizer=Adam(lr=1e-3, decay=0),
                          label_smoothing=0,
                          model_pruning=False,
                          pruning_end_step=10000):
    '''create the training model, for YOLOv3'''
    #K.clear_session() # get a new session
    num_anchors = len(anchors)
    #YOLOv3 model has 9 anchors and 3 feature layers but
    #Tiny YOLOv3 model has 6 anchors and 2 feature layers,
    #so we can calculate feature layers number to get model type
    num_feature_layers = num_anchors // 3

    #feature map target value, so its shape should be like:
    # [
    #  (image_height/32, image_width/32, 3, num_classes+5),
    #  (image_height/16, image_width/16, 3, num_classes+5),
    #  (image_height/8, image_width/8, 3, num_classes+5)
    # ]
    y_true = [
        Input(shape=(None, None, 3, num_classes + 5),
              name='y_true_{}'.format(l)) for l in range(num_feature_layers)
    ]

    model_body, backbone_len = get_yolo3_model(
        model_type,
        num_feature_layers,
        num_anchors,
        num_classes,
        model_pruning=model_pruning,
        pruning_end_step=pruning_end_step)
    print('Create {} YOLOv3 {} model with {} anchors and {} classes.'.format(
        'Tiny' if num_feature_layers == 2 else '', model_type, num_anchors,
        num_classes))
    print('model layer number:', len(model_body.layers))

    if weights_path:
        model_body.load_weights(weights_path,
                                by_name=True)  #, skip_mismatch=True)
        print('Load weights {}.'.format(weights_path))

    if freeze_level in [1, 2]:
        # Freeze the backbone part or freeze all but final feature map & input layers.
        num = (backbone_len, len(model_body.layers) - 3)[freeze_level - 1]
        for i in range(num):
            model_body.layers[i].trainable = False
        print('Freeze the first {} layers of total {} layers.'.format(
            num, len(model_body.layers)))
    elif freeze_level == 0:
        # Unfreeze all layers.
        for i in range(len(model_body.layers)):
            model_body.layers[i].trainable = True
        print('Unfreeze all of the layers.')

    model_loss, location_loss, confidence_loss, class_loss = Lambda(
        yolo3_loss,
        name='yolo_loss',
        arguments={
            'anchors': anchors,
            'num_classes': num_classes,
            'ignore_thresh': 0.5,
            'label_smoothing': label_smoothing
        })([*model_body.output, *y_true])
    model = Model([model_body.input, *y_true], model_loss)

    model.compile(
        optimizer=optimizer,
        loss={
            # use custom yolo_loss Lambda layer.
            'yolo_loss': lambda y_true, y_pred: y_pred
        })

    loss_dict = {
        'location_loss': location_loss,
        'confidence_loss': confidence_loss,
        'class_loss': class_loss
    }
    add_metrics(model, loss_dict)

    return model
Example #5
0
def fcn_8s(num_classes, input_shape, lr_init, lr_decay):
    img_input = Input(input_shape)

    # Block 1
    x = Conv2D(64, (3, 3), padding='same', name='block1_conv1')(img_input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(64, (3, 3), padding='same', name='block1_conv2')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = MaxPooling2D()(x)

    # Block 2
    x = Conv2D(128, (3, 3), padding='same', name='block2_conv1')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(128, (3, 3), padding='same', name='block2_conv2')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = MaxPooling2D()(x)

    # Block 3
    x = Conv2D(256, (3, 3), padding='same', name='block3_conv1')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(256, (3, 3), padding='same', name='block3_conv2')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(256, (3, 3), padding='same', name='block3_conv3')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    block_3_out = MaxPooling2D()(x)

    # Block 4
    x = Conv2D(512, (3, 3), padding='same', name='block4_conv1')(block_3_out)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(512, (3, 3), padding='same', name='block4_conv2')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(512, (3, 3), padding='same', name='block4_conv3')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    block_4_out = MaxPooling2D()(x)

    # Block 5
    x = Conv2D(512, (3, 3), padding='same', name='block5_conv1')(block_4_out)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(512, (3, 3), padding='same', name='block5_conv2')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(512, (3, 3), padding='same', name='block5_conv3')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = MaxPooling2D()(x)

    # Convolutinalized fully connected layer.
    x = Conv2D(1024, (7, 7), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(1024, (1, 1), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # Classifying layers.
    x = Conv2D(num_classes, (1, 1), strides=(1, 1), activation='linear')(x)
    x = BatchNormalization()(x)

    block_3_out = Conv2D(num_classes, (1, 1),
                         strides=(1, 1),
                         activation='linear')(block_3_out)
    block_3_out = BatchNormalization()(block_3_out)

    block_4_out = Conv2D(num_classes, (1, 1),
                         strides=(1, 1),
                         activation='linear')(block_4_out)
    block_4_out = BatchNormalization()(block_4_out)

    x = Lambda(lambda x: tf.image.resize_images(x, (x.shape[1] * 2, x.shape[2]
                                                    * 2)))(x)
    x = Add()([x, block_4_out])
    x = Activation('relu')(x)

    x = Lambda(lambda x: tf.image.resize_images(x, (x.shape[1] * 2, x.shape[2]
                                                    * 2)))(x)
    x = Add()([x, block_3_out])
    x = Activation('relu')(x)

    x = Lambda(lambda x: tf.image.resize_images(x, (x.shape[1] * 8, x.shape[2]
                                                    * 8)))(x)

    x = Activation('softmax')(x)

    model = Model(img_input, x)
    model.compile(optimizer=Adam(lr=lr_init, decay=lr_decay),
                  loss='categorical_crossentropy',
                  metrics=[dice_coef])

    return model
Example #6
0
    def __init__(self, config, output_size, latent_vector_input,
                 data_vector_input, name_scope):
        """
        :param config: dictionary
        :param output_size: int
        :param latent_vector_input: generalized tensorflow input
        :param name_scope: string
        """
        logger = logging.getLogger(self.__class__.__name__ + ":" +
                                   str(name_scope))
        # get config
        # ---------
        depth = get_or_default(config, 'depth', 4, logger)
        width_network_config = get_or_default(config, 'width_network', {},
                                              logger)
        translation_network_config = get_or_default(config,
                                                    'translation_network', {},
                                                    logger)
        width_network_args = {
            'output_size': output_size,
            'name_scope': name_scope,
            'name': 'width_network',
            'config': width_network_config
        }
        translation_networks_args = {
            'output_size': output_size,
            'name_scope': name_scope,
            'name': 'translation_network',
            'config': translation_network_config
        }
        # ---------

        # generate masks, width and translation networks
        # ---------
        masks = []
        w_model = []
        t_model = []
        for d in range(depth):
            # alternating random masks for the coupling layers
            if d % 2 == 0:
                mask = EvenMask(output_size)
            else:
                mask = OddMask(output_size)
            with tf.name_scope(name_scope):
                m = tf.Variable(mask(),
                                dtype=tf.float32,
                                trainable=False,
                                name='mask')
            masks.append(m)
            # create w and t
            w_model.append(
                create_model(MultiLayerPerceptron,
                             input_size=output_size,
                             model_args=width_network_args))
            t_model.append(
                create_model(MultiLayerPerceptron,
                             input_size=output_size,
                             model_args=translation_networks_args))
        # ---------

        # build the full sample model by stacking coupling layers
        # ---------
        # feed-forward
        input_cl_ff = list()
        input_cl_ff.append(
            CouplingLayer(mode='feed_forward',
                          input_tensor=latent_vector_input,
                          mask=masks[0],
                          width_model=w_model[0],
                          translation_model=t_model[0]))
        for d in range(1, depth):
            input_cl_ff.append(
                CouplingLayer(mode='feed_forward',
                              input_tensor=input_cl_ff[d - 1].output,
                              mask=masks[d],
                              width_model=w_model[d],
                              translation_model=t_model[d]))
        self._data_vector = input_cl_ff[d].output

        # feed backward
        input_cl_fb = list()
        input_cl_fb.append(
            CouplingLayer(mode='feed_backward',
                          input_tensor=data_vector_input,
                          mask=masks[depth - 1],
                          width_model=w_model[depth - 1],
                          translation_model=t_model[depth - 1]))
        for d in range(1, depth):
            input_cl_fb.append(
                CouplingLayer(mode='feed_backward',
                              input_tensor=input_cl_fb[d - 1].output,
                              mask=masks[depth - 1 - d],
                              width_model=w_model[depth - 1 - d],
                              translation_model=t_model[depth - 1 - d]))
        self._latent_vector = input_cl_fb[d].output
        # ---------

        # build the likelihood model
        # ---------
        concat_log_det_jac = Concatenate(axis=1)(
            [input_cl_fb[c].log_det_jac for c in range(depth)])

        self._log_det_jac = Lambda(lambda x: tf.reduce_sum(x, axis=1))(
            concat_log_det_jac)
        self._det_jac_test = tf.matrix_determinant(
            tf.stack([
                tf.gradients(self._latent_vector[:, idx], data_vector_input)[0]
                for idx in range(output_size)
            ],
                     axis=1))
        self._llh = Lambda(lambda x: -0.5*output_size*np.log(2*np.pi) - 0.5 * tf.reduce_sum(tf.square(x[0]), axis=1)+ \
                                     x[1])([self._latent_vector, self._log_det_jac])
                        padding='same', name=name_fn('conv', 2))(x)
    if batch_norm:
        x = BatchNormalization(name=name_fn('bn', 2))(x)
    x = LeakyReLU(alpha=0.3, name=name_fn('act', 2))(x)

    return x


# As the dimensions of our images may not be normalized/even, it is possible that after 
# downsampling and upsampling, we do not reobtain the original size (with a difference 
# of +/- 1px).
# To avoid the problems this may cause, we define a layer to slightly resize the generated
# image to the dimensions of the target one:
ResizeToSame = lambda name: Lambda(
    lambda images: tf.image.resize(images[0], tf.shape(images[1])[1:3]),
    # `images` is a tuple of 2 tensors.
    # We resize the first image tensor to the shape of the 2nd
    name=name)

def unet(x, out_channels=3, layer_depth=4, filters_orig=32, kernel_size=4,activation_intermediate=None,
         batch_norm=True, final_activation='sigmoid'):
    """
    Pass the tensor through a trainable UNet.
    :param x:                       Input tensor.
    :param out_channels:            Number of output channels.
    :param layer_depth:             Number of convolutional blocks vertically stacked.
    :param filters_orig:            Number of filters for the 1st block (then multiplied by 2 
                                    every block).
    :param kernel_size:             Kernel size for layers.
    :param batch_norm:              Flag to apply batch normalization.
    :param final_activation:        Name/function for the last activation.
Example #8
0
	def build_model(self):

		###################
		### Encoder
		###################
		# definition
		encoder_inputs = Input(shape=(self.args['MAX_LEN_INPUT'],))
		encoder_embed = Embedding(self.args['LEN_WORD2IDX_INPUTS'] + 1,
									self.args['EMBEDDING_DIM'],
									#weights=[embedding_matrix],
									input_length=self.args['MAX_LEN_INPUT'],
									#trainable=True
						)
		encoder_bilstm = Bidirectional(
							LSTM(self.args['LATENT_DIM'],
								return_sequences=True,
								# dropout=0.5 # dropout not available on gpu
						))

		# pipeline
		encoder_x = encoder_embed(encoder_inputs)
		encoder_outputs = encoder_bilstm(encoder_x)

		###################
		### Decoder
		###################

		# definition
		decoder_inputs = Input(shape=(self.args['MAX_LEN_TARGET'],)) # teacher forcing input
		decoder_embed = Embedding(self.args['LEN_WORD2IDX_OUTPUTS'] + 1, 
										self.args['EMBEDDING_DIM']
							)

		# pipeline
		decoder_x = decoder_embed(decoder_inputs)


		def _softmax_over_time(x):
			# make sure we do softmax over the time axis
			# expected shape is N x T x D
			assert(K.ndim(x) > 2)
			e = K.exp(x - K.max(x, axis=1, keepdims=True)) # axis=1에 주목.
			s = K.sum(e, axis=1, keepdims=True)
			return e / s

		# ATTENTION
		# Attention layers need to be global (전역 변수) because they will be repeated Ty times at the decoder
		attn_repeat_layer = RepeatVector(self.args['MAX_LEN_INPUT'])
		attn_concat_layer = Concatenate(axis=-1)
		attn_dense1 = Dense(10, activation='tanh')
		attn_dense2 = Dense(1, activation=_softmax_over_time)
		attn_dot = Dot(axes=1) # to perform the weighted sum of alpha[t] * h[t]

		# define the rest of the decoder (after attention)
		decoder_lstm = LSTM(self.args['LATENT_DIM_DECODER'], return_state=True)
		decoder_dense = Dense(self.args['LEN_WORD2IDX_OUTPUTS'] + 1, activation='softmax')

		initial_s = Input(shape=(self.args['LATENT_DIM_DECODER'],), name='s0')
		initial_c = Input(shape=(self.args['LATENT_DIM_DECODER'],), name='c0')
		context_last_word_concat_layer = Concatenate(axis=2) # for teacher forcing

		# Unlike previous seq2seq, we cannot get the output all in one step
		# Instead we need to do Ty steps And in each of those steps, we need to consider all Tx h's

		# s, c will be re-assigned in each iteration of the loop
		s = initial_s
		c = initial_c

		def _one_step_attention(h, st_1):
			# h = h(1), ..., h(Tx), shape = (Tx, LATENT_DIM * 2)
			# st_1 = s(t-1), shape = (LATENT_DIM_DECODER,)

			# copy s(t-1) Tx times
			# now shape = (Tx, LATENT_DIM_DECODER)
			st_1 = attn_repeat_layer(st_1)

			# Concatenate all h(t)'s with s(t-1)
			# Now of shape (Tx, LATENT_DIM_DECODER + LATENT_DIM * 2)
			x = attn_concat_layer([h, st_1])

			# Neural net first layer
			x = attn_dense1(x)

			# Neural net second layer with special softmax over time
			alphas = attn_dense2(x)

			# "Dot" the alphas and the h's
			# Remember a.dot(b) = sum over a[t] * b[t]
			context = attn_dot([alphas, h])

			return context


		# collect outputs in a list at first
		outputs = []
		# 원래 LSTM은 내부적으로 아래와 같은 for문을 진행하지만, 여기서 우리는 Context를 계산하기 위해서 manual하게 for문을 구성함.
		for t in range(self.args['MAX_LEN_TARGET']): # Ty times

			######################################################
			## `one_step_attention` function !
			# get the context using attention
			context = _one_step_attention(encoder_outputs, s)
  
			# we need a different layer for each time step
			selector = Lambda(lambda x: x[:, t:t+1]) # 해당 time 벡터만 추출. 우리는 layer-wise로 코딩해야 되기 때문에 lambda를 사용.
			xt = selector(decoder_x)
	
			# combine 
			decoder_lstm_input = context_last_word_concat_layer([context, xt])

			# pass the combined [context, last word] into the LSTM
			# along with [s, c]
			# get the new [s, c] and output
			o, s, c = decoder_lstm(decoder_lstm_input, initial_state=[s, c])

			# final dense layer to get next word prediction
			decoder_outputs = decoder_dense(o)
			outputs.append(decoder_outputs)

		def _stack_and_transpose(x): # 다시 원래의 shape로 만들기 위해.
			# 'outputs' is now a list of length Ty
			# each element is of shape (batch size, output vocab size)
			# therefore if we simply stack all the outputs into 1 tensor
			# it would be of shape T x N x D
			# we would like it to be of shape N x T x D
			# x is a list of length T, each element is a batch_size x output_vocab_size tensor
			x = K.stack(x) # is now T x batch_size x output_vocab_size tensor
			x = K.permute_dimensions(x, pattern=(1, 0, 2)) # is now batch_size x T x output_vocab_size
			return x

		# pipeline
		stacker = Lambda(_stack_and_transpose)
		decoder_outputs = stacker(outputs)

		#########
		### Encoder&Decoder Model
		self.e2d_model = Model(
			inputs=[
				encoder_inputs,
				decoder_inputs,
				initial_s, 
				initial_c,
			],
			outputs=decoder_outputs)

		# compile the model
		self.e2d_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])


		########################### For Prediction ###########################

		###################
		### t1 Encoder
		###################

		self.encoder_model = Model(encoder_inputs, encoder_outputs)

		###################
		### t1 Decoder
		###################

		# next we define a T=1 decoder model
		encoder_outputs_as_input = Input(shape=(self.args['MAX_LEN_INPUT'], self.args['LATENT_DIM'] * 2,))
		decoder_inputs_single = Input(shape=(1,))
		decoder_inputs_single_x = decoder_embed(decoder_inputs_single)

		# no need to loop over attention steps this time because there is only one step
		context = _one_step_attention(encoder_outputs_as_input, initial_s)

		# combine context with last word
		decoder_lstm_input = context_last_word_concat_layer([context, decoder_inputs_single_x])

		# lstm and final dense
		o, s, c = decoder_lstm(decoder_lstm_input, initial_state=[initial_s, initial_c])
		decoder_outputs = decoder_dense(o)

		# note: we don't really need the final stack and tranpose
		# because there's only 1 output
		# it is already of size N x D
		# no need to make it 1 x N x D --> N x 1 x D
		# time dimension이 1이기 때문에 자동으로 없어짐: 따라서, stack_and_transpose함수가 필요없음.

		# create the model object
		self.decoder_model = Model(
			inputs=[
				decoder_inputs_single,
				encoder_outputs_as_input,
				initial_s, 
				initial_c
			],
			outputs=[decoder_outputs, s, c]
		)
Example #9
0
def model_unet_kaggle(img_hieght, img_width, img_channel, num_classes):
    """
    This function returns a U-Net Model for this binary fire segmentation images:
    :param img_hieght: Image Height
    :param img_width: Image Width
    :param img_channel: Number of channels in each image
    :param num_classes: Number of classes based on the Ground Truth Masks
    :return: A convolutional NN based on Tensorflow and Keras
    """
    inputs = Input((img_hieght, img_width, img_channel))
    s = Lambda(lambda x: x / 255)(inputs)

    c1 = Conv2D(16, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(s)
    c1 = Dropout(0.1)(c1)
    c1 = Conv2D(16, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(c1)
    p1 = MaxPooling2D((2, 2))(c1)

    c2 = Conv2D(32, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(p1)
    c2 = Dropout(0.1)(c2)
    c2 = Conv2D(32, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(c2)
    p2 = MaxPooling2D((2, 2))(c2)

    c3 = Conv2D(64, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(p2)
    c3 = Dropout(0.2)(c3)
    c3 = Conv2D(64, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(c3)
    p3 = MaxPooling2D((2, 2))(c3)

    c4 = Conv2D(128, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(p3)
    c4 = Dropout(0.2)(c4)
    c4 = Conv2D(128, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(c4)
    p4 = MaxPooling2D(pool_size=(2, 2))(c4)

    c5 = Conv2D(256, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(p4)
    c5 = Dropout(0.3)(c5)
    c5 = Conv2D(256, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(c5)

    u6 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = concatenate([u6, c4])
    c6 = Conv2D(128, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(u6)
    c6 = Dropout(0.2)(c6)
    c6 = Conv2D(128, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(c6)

    u7 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = concatenate([u7, c3])
    c7 = Conv2D(64, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(u7)
    c7 = Dropout(0.2)(c7)
    c7 = Conv2D(64, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(c7)

    u8 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7)
    u8 = concatenate([u8, c2])
    c8 = Conv2D(32, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(u8)
    c8 = Dropout(0.1)(c8)
    c8 = Conv2D(32, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(c8)

    u9 = Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8)
    u9 = concatenate([u9, c1], axis=3)
    c9 = Conv2D(16, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(u9)
    c9 = Dropout(0.1)(c9)
    c9 = Conv2D(16, (3, 3),
                activation='elu',
                kernel_initializer='he_normal',
                padding='same')(c9)

    outputs = Conv2D(1, (1, 1), activation='sigmoid')(c9)

    model = Model(inputs=[inputs], outputs=[outputs])
    return model
Example #10
0
def dual_path_block(x, block_type, cardinality, filter_increment,
                    pointwise_filters_a, grouped_conv_filters_b,
                    pointwise_filters_c):
    if tf.keras.backend.image_data_format() == 'channels_first':
        channel_axis = 1
    else:
        channel_axis = -1

    grouped_channels = int(grouped_conv_filters_b / cardinality)
    init = concatenate(x, axis=channel_axis) if isinstance(x, list) else x

    if block_type == 'projection':
        stride = (1, 1)
        projection = True
    elif block_type == 'downsample':
        stride = (2, 2)
        projection = True
    elif block_type == 'normal':
        stride = (1, 1)
        projection = False
    else:
        raise ValueError(
            '`block_type` must be one of ["projection", "downsample", "normal"]. Given %s'
            % block_type)

    if projection:
        projection_path = relu_block(init,
                                     filters=pointwise_filters_c +
                                     2 * filter_increment,
                                     kernel=(1, 1),
                                     stride=stride)
        input_residual_path = Lambda(
            lambda z: z[:, :, :, :pointwise_filters_c] if K.image_data_format(
            ) == 'channels_last' else z[:, :pointwise_filters_c, :, :])(
                projection_path)
        input_dense_path = Lambda(
            lambda z: z[:, :, :, pointwise_filters_c:] if K.image_data_format(
            ) == 'channels_last' else z[:, pointwise_filters_c:, :, :])(
                projection_path)
    else:
        input_residual_path = x[0]
        input_dense_path = x[1]

    x = relu_block(init, filters=pointwise_filters_a, kernel=(1, 1))
    x = grouped_block(x,
                      grouped_channels=grouped_channels,
                      cardinality=cardinality,
                      strides=stride)
    x = relu_block(x,
                   filters=pointwise_filters_c + filter_increment,
                   kernel=(1, 1))

    output_residual_path = Lambda(lambda z: z[:, :, :, :pointwise_filters_c]
                                  if K.image_data_format() == 'channels_last'
                                  else z[:, :pointwise_filters_c, :, :])(x)
    output_dense_path = Lambda(lambda z: z[:, :, :, pointwise_filters_c:]
                               if K.image_data_format() == 'channels_last' else
                               z[:, pointwise_filters_c:, :, :])(x)

    residual_path = add([input_residual_path, output_residual_path])
    dense_path = concatenate([input_dense_path, output_dense_path],
                             axis=channel_axis)

    return [residual_path, dense_path]
Example #11
0
def pointnet_cls(include_top=True,
                 weights=None,
                 input_tensor=None,
                 input_shape=(2048, 3),
                 pooling=None,
                 classes=40,
                 activation=None,
                 use_tnet=True):
    """
    PointNet model for object classification
    :param include_top: whether to include the stack of fully connected layers
    :param weights: one of `None` (random initialization),
                    'modelnet' (pre-training on ModelNet),
                    or the path to the weights file to be loaded.
    :param input_tensor: optional tensor of size BxNxK
    :param input_shape: shape of the input point clouds (NxK)
    :param pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 2D tensor output of the last convolutional block (Nx1024).
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a 1D tensor of size 1024.
            - `max` means that global max pooling will
                be applied.
    :param classes: number of classes in the classification problem; if dict, construct multiple disjoint top layers
    :param activation: activation of the last layer (default None).
    :param use_tnet: whether to use the transformation subnets or not.
    :return: Keras model of the classification network
    """

    assert K.image_data_format() == 'channels_last'

    # Generate input tensor and get base network
    if input_tensor is None:
        input_tensor = Input(input_shape, name='Input_cloud')
    num_point = input_tensor.shape[-2]
    net = pointnet_base(input_tensor, use_tnet)

    # Top layers
    if include_top:
        # Symmetric function: max pooling
        # Done in 2D since 1D is painfully slow
        net = MaxPooling2D(pool_size=(num_point, 1),
                           padding='valid',
                           name='maxpool')(Lambda(K.expand_dims)(net))
        net = Reshape((1024, ))(net)
        if isinstance(classes, dict):
            # Disjoint stacks of fc layers, one per value in dict
            net = [
                dense_bn(net, units=512, scope=r + '_fc1', activation='relu')
                for r in classes
            ]
            net = [
                Dropout(0.3, name=r + '_dp1')(n) for r, n in zip(classes, net)
            ]
            net = [
                dense_bn(n, units=256, scope=r + '_fc2', activation='relu')
                for r, n in zip(classes, net)
            ]
            net = [
                Dropout(0.3, name=r + '_dp2')(n) for r, n in zip(classes, net)
            ]
            net = [
                Dense(units=classes[r], activation=activation, name=r)(n)
                for r, n in zip(classes, net)
            ]
        else:
            # Fully connected layers for a single classification task
            net = dense_bn(net, units=512, scope='fc1', activation='relu')
            net = Dropout(0.3, name='dp1')(net)
            net = dense_bn(net, units=256, scope='fc2', activation='relu')
            net = Dropout(0.3, name='dp2')(net)
            net = Dense(units=classes, name='fc3', activation=activation)(net)
    else:
        if pooling == 'avg':
            net = MaxPooling2D(pool_size=(num_point, 1),
                               padding='valid',
                               name='maxpool')(Lambda(K.expand_dims)(net))
            net = Reshape((net.shape[-2], ))(net)
        elif pooling == 'max':
            net = AveragePooling2D(pool_size=(num_point, 1),
                                   padding='valid',
                                   name='avgpool')(Lambda(K.expand_dims)(net))
            net = Reshape((net.shape[-2], ))(net)

    model = Model(input_tensor, net, name='pointnet_cls')

    # Load weights.
    if weights == 'modelnet':
        weights_path = keras_utils.get_file(
            'pointnet_modelnet_weights_tf_dim_ordering_tf_kernels.h5',
            WEIGHTS_PATH,
            cache_subdir='models')
        model.load_weights(weights_path, by_name=True)
        if K.backend() == 'theano':
            keras_utils.convert_all_kernels_in_model(model)
    elif weights is not None:
        model.load_weights(weights, by_name=True)

    return model
def ElmoEmbedding(x):
    return elmo_model(inputs={"tokens": tf.squeeze(tf.cast(x,    tf.string)),"sequence_len": tf.constant(batch_size*[max_len])
                     },
                      signature="tokens",
                      as_dict=True)["elmo"]

from tensorflow.keras.layers import Dense, Flatten, Embedding, LSTM, TimeDistributed, Dropout, Bidirectional, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.layers import concatenate
from tensorflow.keras import Input



input = Input(shape=(max_len,), dtype=tf.string)
embeddings = Lambda(ElmoEmbedding, output_shape=(max_len, 1024))(input)
x = Bidirectional(LSTM(units=512, return_sequences=True, recurrent_dropout=0.2, dropout=0.2))(embeddings)
x_rnn = Bidirectional(LSTM(units=512, return_sequences=True, recurrent_dropout=0.2, dropout=0.2))(x)
x = concatenate([x, x_rnn])
out = TimeDistributed(Dense(n_tags, activation='softmax'))(x)

model = Model(input, out)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

X_tr, X_val = X_train[:1213*batch_size], X_train[-135*batch_size:]
y_tr, y_val = y_train[:1213*batch_size], y_train[-135*batch_size:]
y_tr = y_tr.reshape(y_tr.shape[0], y_tr.shape[1], 1)
y_val = y_val.reshape(y_val.shape[0], y_val.shape[1], 1)

m = np.array(X_tr)
print(m.shape)
Example #13
0
            0: 32,
            1: 16
        }[l], w // {
            0: 32,
            1: 16
        }[l], num_anchors // 2, num_classes + 5)) for l in range(2)
    ]

    # 输入为*model_body.input, *y_true
    # 输出为model_loss
    loss_input = [*model_body.output, *y_true]
    model_loss = Lambda(yolo_loss,
                        output_shape=(1, ),
                        name='yolo_loss',
                        arguments={
                            'anchors': anchors,
                            'num_classes': num_classes,
                            'ignore_thresh': 0.5,
                            'label_smoothing': label_smoothing
                        })(loss_input)

    model = Model([model_body.input, *y_true], model_loss)

    # 训练参数设置
    logging = TensorBoard(log_dir=log_dir)
    checkpoint = ModelCheckpoint(
        log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
        monitor='val_loss',
        save_weights_only=True,
        save_best_only=False,
        period=1)
Example #14
0
def build_transformer(source_vocabulary_size,
                      target_vocabulary_size,
                      max_length,
                      share_word_embedding=False,
                      n=6,
                      h=8,
                      d_k=64,
                      d_v=64,
                      d_model=512,
                      optimizer="adam",
                      null_token_value=0):
    source_input = Input(shape=(None, ), name="source_input")
    target_input = Input(shape=(None, ), name="target_input")

    enc_input = Lambda(lambda x: x[:, 1:])(source_input)
    dec_input = Lambda(lambda x: x[:, :-1])(target_input)
    dec_target_output = Lambda(lambda x: x[:, 1:])(target_input)

    # create embedding
    source_word_embedding = Embedding(
        source_vocabulary_size,
        d_model,
        name="source_embedding" if share_word_embedding else "source_embedding"
    )  # weights=[_get_positional_encoding_matrix(max_length, d_model)]
    if share_word_embedding:
        target_word_embedding = source_word_embedding
    else:
        target_word_embedding = Embedding(target_vocabulary_size,
                                          d_model,
                                          name="target_embedding")
    # embedding for the position encoding
    position_encoding = Embedding(
        max_length,
        d_model,
        trainable=False,
        weights=[_get_positional_encoding_matrix(max_length, d_model)],
        name="position_embedding")

    enc = Encoder(source_word_embedding,
                  position_encoding,
                  n=n,
                  h=h,
                  d_k=d_k,
                  d_v=d_v,
                  d_model=d_model,
                  d_inner_hid=512)
    dec = Decoder(target_word_embedding,
                  position_encoding,
                  n=n,
                  h=h,
                  d_k=d_k,
                  d_v=d_v,
                  d_model=d_model,
                  d_inner_hid=512)

    enc_output = enc(enc_input)
    dec_output = dec(dec_input, enc_output)

    # lin_dense = TimeDistributed(Dense(d_model))
    fin_output = TimeDistributed(Dense(target_vocabulary_size,
                                       activation=None,
                                       use_bias=False),
                                 name="output")  # "softmax"

    # lin_dense_out = lin_dense(dec_output)
    fin_output_out = fin_output(dec_output)  # lin_dense_out)

    accuracy = Lambda(_get_accuracy,
                      arguments={"null_token_value": null_token_value
                                 })([fin_output_out, dec_target_output])
    loss = Lambda(_get_loss, arguments={"null_token_value": null_token_value
                                        })([fin_output_out, dec_target_output])

    train_model = Model(inputs=[source_input, target_input], outputs=loss)
    train_model.add_loss([loss])
    train_model.compile(optimizer, None)
    train_model.metrics_names.append('accuracy')
    # when using tf.keras
    #train_model.metrics_tensors.append(accuracy)
    train_model.metrics.append(accuracy)

    inference_model = Model([source_input, target_input], fin_output_out)

    return train_model, inference_model
Example #15
0
 def __init__(self):
     Lambda.__init__(self, lambda x: tf.log(x + 1e-10))
Example #16
0
    def __initialize_poison_tensorflow(self, x_trigger: np.ndarray,
                                       y_trigger: np.ndarray,
                                       x_poison: np.ndarray,
                                       y_poison: np.ndarray):
        """
        Initialize poison noises to be optimized.

        :param x_trigger: A list of samples to use as triggers.
        :param y_trigger: A list of target classes to classify the triggers into.
        :param x_train: A list of training data to poison a portion of.
        :param y_train: A list of labels for x_train.
        """
        # pylint: disable=no-name-in-module
        from tensorflow.keras import backend as K
        import tensorflow as tf
        from tensorflow.keras.layers import Input, Embedding, Add, Lambda
        from art.estimators.classification.tensorflow import TensorFlowV2Classifier

        if isinstance(self.substitute_classifier, TensorFlowV2Classifier):
            classifier = self.substitute_classifier
        else:
            raise Exception(
                "This method requires `TensorFlowV2Classifier` as `substitute_classifier`'s type"
            )

        self.model_trainable = classifier.model.trainable
        classifier.model.trainable = False  # This value gets revert back later.

        def _weight_grad(classifier: TensorFlowV2Classifier, x: tf.Tensor,
                         target: tf.Tensor) -> tf.Tensor:
            # Get the target gradient vector.
            import tensorflow as tf

            with tf.GradientTape() as t:  # pylint: disable=C0103
                t.watch(classifier.model.weights)
                output = classifier.model(x, training=False)
                loss = classifier.model.compiled_loss(target, output)
            d_w = t.gradient(loss, classifier.model.weights)
            d_w = [w for w in d_w if w is not None]
            d_w = tf.concat([tf.reshape(d, [-1]) for d in d_w], 0)
            d_w_norm = d_w / tf.sqrt(tf.reduce_sum(tf.square(d_w)))
            return d_w_norm

        self.grad_ws_norm = _weight_grad(classifier, tf.constant(x_trigger),
                                         tf.constant(y_trigger))

        # Define the model to apply and optimize the poison.
        input_poison = Input(batch_shape=classifier.model.input.shape)
        input_indices = Input(shape=())
        y_true_poison = Input(shape=np.shape(y_poison)[1:])
        embedding_layer = Embedding(
            len(x_poison),
            np.prod(input_poison.shape[1:]),
            embeddings_initializer=tf.keras.initializers.RandomNormal(
                stddev=self.epsilon * 0.01),
        )
        embeddings = embedding_layer(input_indices)
        embeddings = tf.tanh(embeddings) * self.epsilon
        embeddings = tf.reshape(embeddings, tf.shape(input_poison))
        input_noised = Add()([input_poison, embeddings])
        input_noised = Lambda(
            lambda x: K.clip(x, self.clip_values[0], self.clip_values[1])
        )(input_noised)  # Make sure the poisoned samples are in a valid range.

        def loss_fn(input_noised: tf.Tensor, target: tf.Tensor,
                    grad_ws_norm: tf.Tensor):
            d_w2_norm = _weight_grad(classifier, input_noised, target)
            B = 1 - tf.reduce_sum(grad_ws_norm * d_w2_norm)  # pylint: disable=C0103
            return B

        B = tf.keras.layers.Lambda(lambda x: loss_fn(x[0], x[1], x[2]))(  # pylint: disable=C0103
            [input_noised, y_true_poison, self.grad_ws_norm])

        self.backdoor_model = tf.keras.models.Model(
            [input_poison, y_true_poison, input_indices], [input_noised, B])

        self.backdoor_model.add_loss(B)

        class PredefinedLRSchedule(
                tf.keras.optimizers.schedules.LearningRateSchedule):
            """
            Use a preset learning rate based on the current training epoch.
            """
            def __init__(self, learning_rates: List[float],
                         milestones: List[int]):
                self.schedule = list(zip(milestones, learning_rates))

            def __call__(self, step: int) -> float:
                lr_prev = self.schedule[0][1]
                for m, learning_rate in self.schedule:
                    if step < m:
                        return lr_prev
                    lr_prev = learning_rate
                return lr_prev

            def get_config(self) -> Dict:
                """
                Returns the parameters.
                """
                return {"schedule": self.schedule}

        self.optimizer = tf.keras.optimizers.Adam(gradient_transformers=[
            lambda grads_and_vars: [(tf.sign(g), v)
                                    for (g, v) in grads_and_vars]
        ])
        self.lr_schedule = tf.keras.callbacks.LearningRateScheduler(
            PredefinedLRSchedule(*self.learning_rate_schedule))
Example #17
0
 def __init__(self, size):
     Lambda.__init__(self, lambda x: tf.tile(tf.expand_dims(tf.expand_dims(x, 2), 3), [1, 1, size, size]))
def sub_pixel_conv2d(scale=2, **kwargs):
    return Lambda(lambda x: tf.compat.v1.depth_to_space(x, scale), **kwargs)
def line_lstm_ctc(input_shape,
                  output_shape,
                  window_width=28,
                  window_stride=14):

    image_height, image_width = input_shape
    output_length, num_classes = output_shape

    num_windows = int((image_width - window_width) / window_stride) + 1

    if num_windows < output_length:
        raise ValueError(
            f'Window width/stride need to generate at least {output_length} windows (currently {num_windows})'
        )

    image_input = Input(shape=input_shape, name='image')
    y_true = Input(shape=(output_length, ), name='y_true')
    input_length = Input(shape=(1, ), name='input_length')
    label_length = Input(shape=(1, ), name='label_length')

    gpu_present = len(device_lib.list_local_devices()) > 1
    lstm_fn = CuDNNLSTM if gpu_present else LSTM

    # Your code should use slide_window and extract image patches from image_input.
    # Pass a convolutional model over each image patch to generate a feature vector per window.
    # Pass these features through one or more LSTM layers.
    # Convert the lstm outputs to softmax outputs.
    # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length).

    ##### Your code below (Lab 3)
    image_reshaped = Reshape((image_height, image_width, 1))(image_input)
    # (image_height, image_width, 1)

    image_patches = Lambda(slide_window,
                           arguments={
                               'window_width': window_width,
                               'window_stride': window_stride
                           })(image_reshaped)

    convnet = lenet((image_height, window_width, 1), (num_classes, ))
    convnet = KerasModel(inputs=convnet.inputs,
                         outputs=convnet.layers[-2].output)

    convnet_outputs = TimeDistributed(convnet)(image_patches)
    # (num_windows, 200)
    lstm_output = lstm_fn(200, return_sequences=True)(convnet_outputs)

    softmax_output = Dense(num_classes,
                           activation='softmax',
                           name='softmax_output')(lstm_output)

    ##### Your code above (Lab 3)

    input_length_processed = Lambda(
        lambda x, num_windows=None: x * num_windows,
        arguments={'num_windows': num_windows})(input_length)

    ctc_loss_output = Lambda(
        lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name='ctc_loss')(
            [y_true, softmax_output, input_length_processed, label_length])

    ctc_decoded_output = Lambda(
        lambda x: ctc_decode(x[0], x[1], output_length),
        name='ctc_decoded')([softmax_output, input_length_processed])

    model = KerasModel(
        inputs=[image_input, y_true, input_length, label_length],
        outputs=[ctc_loss_output, ctc_decoded_output])
    return model
x = Bidirectional(GRU(RNN_cell, return_sequences=True))(x)
x = Bidirectional(GRU(RNN_cell, return_sequences=True))(x)
x = Dense(num_classes, activation='softmax')(x)
# 定义模型
model = Model(image_input, x)

# In[6]:

# 定义标签输入
labels = Input(shape=(max_len), name='max_len')
# 输入长度
input_len = Input(shape=(1), name='input_len')
# 标签长度
label_len = Input(shape=(1), name='label_len')
# Lambda的作用是可以将自定义的函数封装到网络中,用于自定义的一些数据计算处理
ctc_out = Lambda(ctc_lambda_func,
                 name='ctc')([x, labels, input_len, label_len])
# 定义模型
ctc_model = Model(inputs=[image_input, labels, input_len, label_len],
                  outputs=ctc_out)

# In[7]:

# 注意这里是load_weights,载入权值,这里不能直接用load_model载入模型
# 因为keras中没有封装ctc的loss,ctc的loss是在tensorflow中定义的,属于keras外部自定义loss
# 模型save的时候如果包含了自定义loss,那么在load_model的时候也需要声明自定义loss。
# 在这个应用中还是重新搭建一遍模型并使用load_weights载入模型权值比较简单
model.load_weights('Best_Captcha_ctc.h5')

# In[34]:

# 用于预测的字符集多一个空白符
Example #21
0
def UpSampling2DBilinear(size):
    return Lambda(lambda x: tf.image.resize_bilinear(x, size, align_corners=True))
Example #22
0
    def CreateModel(self):
        '''
		定义CNN/LSTM/CTC模型,使用函数式模型
		输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s)
		隐藏层一:3*3卷积层
		隐藏层二:池化层,池化窗口大小为2
		隐藏层三:Dropout层,需要断开的神经元的比例为0.2,防止过拟合
		隐藏层四:循环层、LSTM/GRU层
		隐藏层五:Dropout层,需要断开的神经元的比例为0.2,防止过拟合
		隐藏层六:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数,
		输出层:自定义层,即CTC层,使用CTC的loss作为损失函数,实现连接性时序多输出
		
		'''
        # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500
        input_data = Input(name='the_input',
                           shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH,
                                  1))

        layer_h1 = Conv2D(32, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(input_data)  # 卷积层
        layer_h1 = Dropout(0.1)(layer_h1)
        layer_h2 = Conv2D(32, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h1)  # 卷积层
        layer_h3 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h2)  # 池化层
        #layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合
        layer_h3 = Dropout(0.2)(layer_h3)
        layer_h4 = Conv2D(64, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h3)  # 卷积层
        layer_h4 = Dropout(0.2)(layer_h4)
        layer_h5 = Conv2D(64, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h4)  # 卷积层
        layer_h6 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h5)  # 池化层

        layer_h6 = Dropout(0.3)(layer_h6)
        layer_h7 = Conv2D(128, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h6)  # 卷积层
        layer_h7 = Dropout(0.3)(layer_h7)
        layer_h8 = Conv2D(128, (3, 3),
                          use_bias=True,
                          activation='relu',
                          padding='same',
                          kernel_initializer='he_normal')(layer_h7)  # 卷积层
        layer_h9 = MaxPooling2D(pool_size=2, strides=None,
                                padding="valid")(layer_h8)  # 池化层

        layer_h9 = Dropout(0.3)(layer_h9)
        layer_h10 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h9)  # 卷积层
        layer_h10 = Dropout(0.4)(layer_h10)
        layer_h11 = Conv2D(128, (3, 3),
                           use_bias=True,
                           activation='relu',
                           padding='same',
                           kernel_initializer='he_normal')(layer_h10)  # 卷积层
        layer_h12 = MaxPooling2D(pool_size=1, strides=None,
                                 padding="valid")(layer_h11)  # 池化层

        #test=Model(inputs = input_data, outputs = layer_h6)
        #test.summary()

        layer_h13 = Reshape((200, 3200))(layer_h12)  #Reshape层

        layer_h13 = Dropout(0.4)(layer_h13)
        layer_h14 = Dense(128,
                          activation="relu",
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h13)  # 全连接层
        layer_h14 = Dropout(0.4)(layer_h14)
        inner = layer_h14
        #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层

        rnn_size = 128
        gru_1 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru1')(inner)
        gru_1b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru1_b')(inner)
        gru1_merged = add([gru_1, gru_1b])
        gru_2 = GRU(rnn_size,
                    return_sequences=True,
                    kernel_initializer='he_normal',
                    name='gru2')(gru1_merged)
        gru_2b = GRU(rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     kernel_initializer='he_normal',
                     name='gru2_b')(gru1_merged)

        gru2 = concatenate([gru_2, gru_2b])
        #layer_h12 = GRU(128,activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True)(layer_h11)

        layer_h15 = Dropout(0.4)(gru2)
        layer_h16 = Dense(128,
                          activation="relu",
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h15)  # 全连接层

        layer_h16 = Dropout(0.5)(layer_h16)  # 随机中断部分神经网络连接,防止过拟合
        layer_h17 = Dense(self.MS_OUTPUT_SIZE,
                          use_bias=True,
                          kernel_initializer='he_normal')(layer_h16)  # 全连接层

        y_pred = Activation('softmax', name='Activation0')(layer_h17)
        model_data = Model(inputs=input_data, outputs=y_pred)
        #model_data.summary()

        labels = Input(name='the_labels',
                       shape=[self.label_max_string_length],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer

        #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC
        loss_out = Lambda(self.ctc_lambda_func, output_shape=(1, ),
                          name='ctc')(
                              [y_pred, labels, input_length, label_length])

        model = Model(inputs=[input_data, labels, input_length, label_length],
                      outputs=loss_out)

        model.summary()

        # clipnorm seems to speeds up convergence
        #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
        ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06)

        #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
        model.compile(loss={
            'ctc': lambda y_true, y_pred: y_pred
        },
                      optimizer=ada_d)

        # captures output of softmax so we can decode the output during visualization
        test_func = K.function([input_data], [y_pred])

        print('[*提示] 创建模型成功,模型编译成功')
        return model, model_data
Example #23
0
def build_model(image_size,
                n_classes,
                mode='training',
                l2_regularization=0.0,
                min_scale=0.1,
                max_scale=0.9,
                scales=None,
                aspect_ratios_global=[0.5, 1.0, 2.0],
                aspect_ratios_per_layer=None,
                two_boxes_for_ar1=True,
                steps=None,
                offsets=None,
                clip_boxes=False,
                variances=[1.0, 1.0, 1.0, 1.0],
                coords='centroids',
                normalize_coords=False,
                subtract_mean=None,
                divide_by_stddev=None,
                swap_channels=False,
                confidence_thresh=0.01,
                iou_threshold=0.45,
                top_k=200,
                nms_max_output_size=400,
                return_predictor_sizes=False):
    '''
    Build a Keras model with SSD architecture, see references.

    The model consists of convolutional feature layers and a number of convolutional
    predictor layers that take their input from different feature layers.
    The model is fully convolutional.

    The implementation found here is a smaller version of the original architecture
    used in the paper (where the base network consists of a modified VGG-16 extended
    by a few convolutional feature layers), but of course it could easily be changed to
    an arbitrarily large SSD architecture by following the general design pattern used here.
    This implementation has 7 convolutional layers and 4 convolutional predictor
    layers that take their input from layers 4, 5, 6, and 7, respectively.

    Most of the arguments that this function takes are only needed for the anchor
    box layers. In case you're training the network, the parameters passed here must
    be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading
    trained weights, the parameters passed here must be the same as the ones used
    to produce the trained weights.

    Some of these arguments are explained in more detail in the documentation of the
    `SSDBoxEncoder` class.

    Note: Requires Keras v2.0 or later. Training currently works only with the
    TensorFlow backend (v1.0 or later).

    Arguments:
        image_size (tuple): The input image size in the format `(height, width, channels)`.
        n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO.
        mode (str, optional): One of 'training', 'inference' and 'inference_fast'. In 'training' mode,
            the model outputs the raw prediction tensor, while in 'inference' and 'inference_fast' modes,
            the raw predictions are decoded into absolute coordinates and filtered via confidence thresholding,
            non-maximum suppression, and top-k filtering. The difference between latter two modes is that
            'inference' follows the exact procedure of the original Caffe implementation, while
            'inference_fast' uses a faster prediction decoding procedure.
        l2_regularization (float, optional): The L2-regularization rate. Applies to all convolutional layers.
        min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction
            of the shorter side of the input images.
        max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction
            of the shorter side of the input images. All scaling factors between the smallest and the
            largest will be linearly interpolated. Note that the second to last of the linearly interpolated
            scaling factors will actually be the scaling factor for the last predictor layer, while the last
            scaling factor is used for the second box for aspect ratio 1 in the last predictor layer
            if `two_boxes_for_ar1` is `True`.
        scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer.
            This list must be one element longer than the number of predictor layers. The first `k` elements are the
            scaling factors for the `k` predictor layers, while the last element is used for the second box
            for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional
            last scaling factor must be passed either way, even if it is not being used. If a list is passed,
            this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero.
        aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be
            generated. This list is valid for all predictor layers. The original implementation uses more aspect ratios
            for some predictor layers and fewer for others. If you want to do that, too, then use the next argument instead.
        aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each predictor layer.
            This allows you to set the aspect ratios for each predictor layer individually. If a list is passed,
            it overrides `aspect_ratios_global`.
        two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise.
            If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated
            using the scaling factor for the respective layer, the second one will be generated using
            geometric mean of said scaling factor and next bigger scaling factor.
        steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be
            either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many
            pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over
            the image. If the list contains ints/floats, then that value will be used for both spatial dimensions.
            If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`.
            If no steps are provided, then they will be computed such that the anchor box center points will form an
            equidistant grid within the image dimensions.
        offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be
            either floats or tuples of two floats. These numbers represent for each predictor layer how many
            pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be
            as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions
            of the step size specified in the `steps` argument. If the list contains floats, then that value will
            be used for both spatial dimensions. If the list contains tuples of two floats, then they represent
            `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size,
            which is also the recommended setting.
        clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries.
        variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by
            its respective variance value.
        coords (str, optional): The box coordinate format to be used internally by the model (i.e. this is not the input format
            of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width,
            and height), 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`.
        normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates,
            i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates.
        subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values
            of any shape that is broadcast-compatible with the image shape. The elements of this array will be
            subtracted from the image pixel intensity values. For example, pass a list of three integers
            to perform per-channel mean normalization for color images.
        divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or
            floating point values of any shape that is broadcast-compatible with the image shape. The image pixel
            intensity values will be divided by the elements of this array. For example, pass a list
            of three integers to perform per-channel standard deviation normalization for color images.
        swap_channels (list, optional): Either `False` or a list of integers representing the desired order in which the input
            image channels should be swapped.
        confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
            positive class in order to be considered for the non-maximum suppression stage for the respective class.
            A lower value will result in a larger part of the selection process being done by the non-maximum suppression
            stage, while a larger value will result in a larger part of the selection process happening in the confidence
            thresholding stage.
        iou_threshold (float, optional): A float in [0,1]. All boxes that have a Jaccard similarity of greater than `iou_threshold`
            with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
            to the box's confidence score.
        top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
            non-maximum suppression stage.
        nms_max_output_size (int, optional): The maximal number of predictions that will be left over after the NMS stage.
        return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also
            a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since
            you can always get their sizes easily via the Keras API, but it's convenient and less error-prone
            to get them this way. They are only relevant for training anyway (SSDBoxEncoder needs to know the
            spatial dimensions of the predictor layers), for inference you don't need them.

    Returns:
        model: The Keras SSD model.
        predictor_sizes (optional): A Numpy array containing the `(height, width)` portion
            of the output tensor shape for each convolutional predictor layer. During
            training, the generator function needs this in order to transform
            the ground truth labels into tensors of identical structure as the
            output tensors of the model, which is in turn needed for the cost
            function.

    References:
        https://arxiv.org/abs/1512.02325v5
    '''

    n_predictor_layers = 4  # The number of predictor conv layers in the network
    n_classes += 1  # Account for the background class.
    l2_reg = l2_regularization  # Make the internal name shorter.
    img_height, img_width, img_channels = image_size[0], image_size[
        1], image_size[2]

    ############################################################################
    # Get a few exceptions out of the way.
    ############################################################################

    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        raise ValueError(
            "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified."
        )
    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError(
                "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}."
                .format(n_predictor_layers, len(aspect_ratios_per_layer)))

    if (min_scale is None or max_scale is None) and scales is None:
        raise ValueError(
            "Either `min_scale` and `max_scale` or `scales` need to be specified."
        )
    if scales:
        if len(scales) != n_predictor_layers + 1:
            raise ValueError(
                "It must be either scales is None or len(scales) == {}, but len(scales) == {}."
                .format(n_predictor_layers + 1, len(scales)))
    else:  # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale`
        scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1)

    if len(
            variances
    ) != 4:  # We need one variance value for each of the four box coordinates
        raise ValueError(
            "4 variance values must be pased, but {} values were received.".
            format(len(variances)))
    variances = np.array(variances)
    if np.any(variances <= 0):
        raise ValueError(
            "All variances must be >0, but the variances given are {}".format(
                variances))

    if (not (steps is None)) and (len(steps) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one step value per predictor layer.")

    if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one offset value per predictor layer.")

    ############################################################################
    # Compute the anchor box parameters.
    ############################################################################

    # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers.
    if aspect_ratios_per_layer:
        aspect_ratios = aspect_ratios_per_layer
    else:
        aspect_ratios = [aspect_ratios_global] * n_predictor_layers

    # Compute the number of boxes to be predicted per cell for each predictor layer.
    # We need this so that we know how many channels the predictor layers need to have.
    if aspect_ratios_per_layer:
        n_boxes = []
        for ar in aspect_ratios_per_layer:
            if (1 in ar) & two_boxes_for_ar1:
                n_boxes.append(len(ar) +
                               1)  # +1 for the second box for aspect ratio 1
            else:
                n_boxes.append(len(ar))
    else:  # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer
        if (1 in aspect_ratios_global) & two_boxes_for_ar1:
            n_boxes = len(aspect_ratios_global) + 1
        else:
            n_boxes = len(aspect_ratios_global)
        n_boxes = [n_boxes] * n_predictor_layers

    if steps is None:
        steps = [None] * n_predictor_layers
    if offsets is None:
        offsets = [None] * n_predictor_layers

    ############################################################################
    # Define functions for the Lambda layers below.
    ############################################################################

    def identity_layer(tensor):
        return tensor

    def input_mean_normalization(tensor):
        return tensor - np.array(subtract_mean)

    def input_stddev_normalization(tensor):
        return tensor / np.array(divide_by_stddev)

    #def input_channel_swap(tensor):
    #    if len(swap_channels) == 3:
    #        return K.stack([tensor[...,swap_channels[0]], tensor[...,swap_channels[1]], tensor[...,swap_channels[2]]], axis=-1)
    #    elif len(swap_channels) == 4:
    #        return K.stack([tensor[...,swap_channels[0]], tensor[...,swap_channels[1]], tensor[...,swap_channels[2]], tensor[...,swap_channels[3]]], axis=-1)

    ############################################################################
    # Build the network.
    ############################################################################

    x = Input(shape=(img_height, img_width, img_channels))

    # The following identity layer is only needed so that the subsequent lambda layers can be optional.
    x1 = Lambda(identity_layer,
                output_shape=(img_height, img_width, img_channels),
                name='identity_layer')(x)
    if not (subtract_mean is None):
        x1 = Lambda(input_mean_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_mean_normalization')(x1)
    if not (divide_by_stddev is None):
        x1 = Lambda(input_stddev_normalization,
                    output_shape=(img_height, img_width, img_channels),
                    name='input_stddev_normalization')(x1)
    #if swap_channels: #REMOVED FOR TFLITE
    #    x1 = Lambda(input_channel_swap, output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1)

    conv1 = Conv2D(32, (5, 5),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv1')(x1)
    conv1 = BatchNormalization(axis=3, momentum=0.99, name='bn1')(
        conv1
    )  # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3
    conv1 = ELU(name='elu1')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1)

    conv2 = Conv2D(48, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv2')(pool1)
    conv2 = BatchNormalization(axis=3, momentum=0.99, name='bn2')(conv2)
    conv2 = ELU(name='elu2')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2)

    conv3 = Conv2D(64, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv3')(pool2)
    conv3 = BatchNormalization(axis=3, momentum=0.99, name='bn3')(conv3)
    conv3 = ELU(name='elu3')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3)

    conv4 = Conv2D(64, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv4')(pool3)
    conv4 = BatchNormalization(axis=3, momentum=0.99, name='bn4')(conv4)
    conv4 = ELU(name='elu4')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2), name='pool4')(conv4)

    conv5 = Conv2D(48, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv5')(pool4)
    conv5 = BatchNormalization(axis=3, momentum=0.99, name='bn5')(conv5)
    conv5 = ELU(name='elu5')(conv5)
    pool5 = MaxPooling2D(pool_size=(2, 2), name='pool5')(conv5)

    conv6 = Conv2D(48, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv6')(pool5)
    conv6 = BatchNormalization(axis=3, momentum=0.99, name='bn6')(conv6)
    conv6 = ELU(name='elu6')(conv6)
    pool6 = MaxPooling2D(pool_size=(2, 2), name='pool6')(conv6)

    conv7 = Conv2D(32, (3, 3),
                   strides=(1, 1),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name='conv7')(pool6)
    conv7 = BatchNormalization(axis=3, momentum=0.99, name='bn7')(conv7)
    conv7 = ELU(name='elu7')(conv7)

    # The next part is to add the convolutional predictor layers on top of the base network
    # that we defined above. Note that I use the term "base network" differently than the paper does.
    # To me, the base network is everything that is not convolutional predictor layers or anchor
    # box layers. In this case we'll have four predictor layers, but of course you could
    # easily rewrite this into an arbitrarily deep base network and add an arbitrary number of
    # predictor layers on top of the base network by simply following the pattern shown here.

    # Build the convolutional predictor layers on top of conv layers 4, 5, 6, and 7.
    # We build two predictor layers on top of each of these layers: One for class prediction (classification), one for box coordinate prediction (localization)
    # We precidt `n_classes` confidence values for each box, hence the `classes` predictors have depth `n_boxes * n_classes`
    # We predict 4 box coordinates for each box, hence the `boxes` predictors have depth `n_boxes * 4`
    # Output shape of `classes`: `(batch, height, width, n_boxes * n_classes)`
    classes4 = Conv2D(n_boxes[0] * n_classes, (3, 3),
                      strides=(1, 1),
                      padding="same",
                      kernel_initializer='he_normal',
                      kernel_regularizer=l2(l2_reg),
                      name='classes4')(conv4)
    classes5 = Conv2D(n_boxes[1] * n_classes, (3, 3),
                      strides=(1, 1),
                      padding="same",
                      kernel_initializer='he_normal',
                      kernel_regularizer=l2(l2_reg),
                      name='classes5')(conv5)
    classes6 = Conv2D(n_boxes[2] * n_classes, (3, 3),
                      strides=(1, 1),
                      padding="same",
                      kernel_initializer='he_normal',
                      kernel_regularizer=l2(l2_reg),
                      name='classes6')(conv6)
    classes7 = Conv2D(n_boxes[3] * n_classes, (3, 3),
                      strides=(1, 1),
                      padding="same",
                      kernel_initializer='he_normal',
                      kernel_regularizer=l2(l2_reg),
                      name='classes7')(conv7)
    # Output shape of `boxes`: `(batch, height, width, n_boxes * 4)`
    boxes4 = Conv2D(n_boxes[0] * 4, (3, 3),
                    strides=(1, 1),
                    padding="same",
                    kernel_initializer='he_normal',
                    kernel_regularizer=l2(l2_reg),
                    name='boxes4')(conv4)
    boxes5 = Conv2D(n_boxes[1] * 4, (3, 3),
                    strides=(1, 1),
                    padding="same",
                    kernel_initializer='he_normal',
                    kernel_regularizer=l2(l2_reg),
                    name='boxes5')(conv5)
    boxes6 = Conv2D(n_boxes[2] * 4, (3, 3),
                    strides=(1, 1),
                    padding="same",
                    kernel_initializer='he_normal',
                    kernel_regularizer=l2(l2_reg),
                    name='boxes6')(conv6)
    boxes7 = Conv2D(n_boxes[3] * 4, (3, 3),
                    strides=(1, 1),
                    padding="same",
                    kernel_initializer='he_normal',
                    kernel_regularizer=l2(l2_reg),
                    name='boxes7')(conv7)

    # Generate the anchor boxes
    # Output shape of `anchors`: `(batch, height, width, n_boxes, 8)`
    anchors4 = AnchorBoxes(img_height,
                           img_width,
                           this_scale=scales[0],
                           next_scale=scales[1],
                           aspect_ratios=aspect_ratios[0],
                           two_boxes_for_ar1=two_boxes_for_ar1,
                           this_steps=steps[0],
                           this_offsets=offsets[0],
                           clip_boxes=clip_boxes,
                           variances=variances,
                           coords=coords,
                           normalize_coords=normalize_coords,
                           name='anchors4')(boxes4)
    anchors5 = AnchorBoxes(img_height,
                           img_width,
                           this_scale=scales[1],
                           next_scale=scales[2],
                           aspect_ratios=aspect_ratios[1],
                           two_boxes_for_ar1=two_boxes_for_ar1,
                           this_steps=steps[1],
                           this_offsets=offsets[1],
                           clip_boxes=clip_boxes,
                           variances=variances,
                           coords=coords,
                           normalize_coords=normalize_coords,
                           name='anchors5')(boxes5)
    anchors6 = AnchorBoxes(img_height,
                           img_width,
                           this_scale=scales[2],
                           next_scale=scales[3],
                           aspect_ratios=aspect_ratios[2],
                           two_boxes_for_ar1=two_boxes_for_ar1,
                           this_steps=steps[2],
                           this_offsets=offsets[2],
                           clip_boxes=clip_boxes,
                           variances=variances,
                           coords=coords,
                           normalize_coords=normalize_coords,
                           name='anchors6')(boxes6)
    anchors7 = AnchorBoxes(img_height,
                           img_width,
                           this_scale=scales[3],
                           next_scale=scales[4],
                           aspect_ratios=aspect_ratios[3],
                           two_boxes_for_ar1=two_boxes_for_ar1,
                           this_steps=steps[3],
                           this_offsets=offsets[3],
                           clip_boxes=clip_boxes,
                           variances=variances,
                           coords=coords,
                           normalize_coords=normalize_coords,
                           name='anchors7')(boxes7)

    # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)`
    # We want the classes isolated in the last axis to perform softmax on them

    #Shape inference is different for keras.layers and tf.keras.layers (-1), check documentation
    #So I had to manually input the intended shape of the reshape

    #cba = classes, boxes, anchors SHAPE: shape will be reused later anyways
    cba_4 = classes4.shape[1] * classes4.shape[2] * n_boxes[0]
    cba_5 = classes5.shape[1] * classes5.shape[2] * n_boxes[1]
    cba_6 = classes6.shape[1] * classes6.shape[2] * n_boxes[2]
    cba_7 = classes7.shape[1] * classes7.shape[2] * n_boxes[3]

    classes4_reshaped = Reshape((cba_4, n_classes),
                                name='classes4_reshape')(classes4)
    classes5_reshaped = Reshape((cba_5, n_classes),
                                name='classes5_reshape')(classes5)
    classes6_reshaped = Reshape((cba_6, n_classes),
                                name='classes6_reshape')(classes6)
    classes7_reshaped = Reshape((cba_7, n_classes),
                                name='classes7_reshape')(classes7)
    # Reshape the box coordinate predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)`

    #shape for classes_reshaped is SAME with boxes EXCEPT for n_classes and anchors so NO NEED TO RECOMPUTE

    # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss
    boxes4_reshaped = Reshape((cba_4, 4), name='boxes4_reshape')(boxes4)
    boxes5_reshaped = Reshape((cba_5, 4), name='boxes5_reshape')(boxes5)
    boxes6_reshaped = Reshape((cba_6, 4), name='boxes6_reshape')(boxes6)
    boxes7_reshaped = Reshape((cba_7, 4), name='boxes7_reshape')(boxes7)
    # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)`
    anchors4_reshaped = Reshape((cba_4, 8), name='anchors4_reshape')(anchors4)
    anchors5_reshaped = Reshape((cba_5, 8), name='anchors5_reshape')(anchors5)
    anchors6_reshaped = Reshape((cba_6, 8), name='anchors6_reshape')(anchors6)
    anchors7_reshaped = Reshape((cba_7, 8), name='anchors7_reshape')(anchors7)

    # Concatenate the predictions from the different layers and the assosciated anchor box tensors
    # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions,
    # so we want to concatenate along axis 1
    # Output shape of `classes_concat`: (batch, n_boxes_total, n_classes)
    classes_concat = Concatenate(axis=1, name='classes_concat')([
        classes4_reshaped, classes5_reshaped, classes6_reshaped,
        classes7_reshaped
    ])

    # Output shape of `boxes_concat`: (batch, n_boxes_total, 4)
    boxes_concat = Concatenate(axis=1, name='boxes_concat')(
        [boxes4_reshaped, boxes5_reshaped, boxes6_reshaped, boxes7_reshaped])

    # Output shape of `anchors_concat`: (batch, n_boxes_total, 8)
    anchors_concat = Concatenate(axis=1, name='anchors_concat')([
        anchors4_reshaped, anchors5_reshaped, anchors6_reshaped,
        anchors7_reshaped
    ])

    # The box coordinate predictions will go into the loss function just the way they are,
    # but for the class predictions, we'll apply a softmax activation layer first
    classes_softmax = Activation('softmax',
                                 name='classes_softmax')(classes_concat)

    # Concatenate the class and box coordinate predictions and the anchors to one large predictions tensor
    # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8)
    predictions = Concatenate(axis=2, name='predictions')(
        [classes_softmax, boxes_concat, anchors_concat])

    if mode == 'training':
        model = Model(inputs=x, outputs=predictions)
    elif mode == 'inference':
        decoded_predictions = DecodeDetections(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    elif mode == 'inference_fast':
        decoded_predictions = DecodeDetectionsFast(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    else:
        raise ValueError(
            "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'."
            .format(mode))

    if return_predictor_sizes:
        # The spatial dimensions are the same for the `classes` and `boxes` predictor layers.
        predictor_sizes = np.array([
            classes4._keras_shape[1:3], classes5._keras_shape[1:3],
            classes6._keras_shape[1:3], classes7._keras_shape[1:3]
        ])
        return model, predictor_sizes
    else:
        return model
Example #24
0
def build_stage2_generator():
    """
    Create Stage-II generator containing the CA Augmentation Network,
    the image encoder and the generator network
    """

    # 1. CA Augmentation Network
    input_layer = Input(shape=(1024, ))
    input_lr_images = Input(shape=(64, 64, 3))

    ca = Dense(256)(input_layer)
    mean_logsigma = LeakyReLU(alpha=0.2)(ca)
    c = Lambda(generate_c)(mean_logsigma)

    # 2. Image Encoder
    x = ZeroPadding2D(padding=(1, 1))(input_lr_images)
    x = Conv2D(128, kernel_size=(3, 3), strides=1, use_bias=False)(x)
    x = ReLU()(x)

    x = ZeroPadding2D(padding=(1, 1))(x)
    x = Conv2D(256, kernel_size=(4, 4), strides=2, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = ZeroPadding2D(padding=(1, 1))(x)
    x = Conv2D(512, kernel_size=(4, 4), strides=2, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    # 3. Joint
    c_code = Lambda(joint_block)([c, x])

    x = ZeroPadding2D(padding=(1, 1))(c_code)
    x = Conv2D(512, kernel_size=(3, 3), strides=1, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    # 4. Residual blocks
    x = residual_block(x)
    x = residual_block(x)
    x = residual_block(x)
    x = residual_block(x)

    # 5. Upsampling blocks
    x = UpSampling2D(size=(2, 2))(x)
    x = Conv2D(512, kernel_size=3, padding="same", strides=1,
               use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = UpSampling2D(size=(2, 2))(x)
    x = Conv2D(256, kernel_size=3, padding="same", strides=1,
               use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = UpSampling2D(size=(2, 2))(x)
    x = Conv2D(128, kernel_size=3, padding="same", strides=1,
               use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = UpSampling2D(size=(2, 2))(x)
    x = Conv2D(64, kernel_size=3, padding="same", strides=1, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Conv2D(3, kernel_size=3, padding="same", strides=1, use_bias=False)(x)
    x = Activation('tanh')(x)

    model = Model(inputs=[input_layer, input_lr_images],
                  outputs=[x, mean_logsigma])
    return model
Example #25
0
def define_generator(image_shape, probe_light_shape, latent_dim):
    init = RandomNormal(stddev=0.02)
    in_image = Input(shape=image_shape)
    probe_image_target = Input(shape=probe_light_shape)
    conv1 = Conv2D(64, (7, 7), padding='same',
                   kernel_initializer=init)(in_image)
    conv1 = BatchNormalization(axis=-1)(conv1)
    conv1 - LeakyReLU(alpha=0.2)(conv1)
    pool1 = AveragePooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(128, (3, 3),
                   strides=(2, 2),
                   padding='same',
                   kernel_initializer=init)(pool1)
    conv2 = BatchNormalization(axis=-1)(conv2)
    conv2 = LeakyReLU(alpha=0.2)(conv2)
    pool2 = AveragePooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(256, (3, 3),
                   strides=(2, 2),
                   padding='same',
                   kernel_initializer=init)(pool2)
    conv3 = BatchNormalization(axis=-1)(conv3)
    conv3 = LeakyReLU(alpha=0.2)(conv3)

    pn = Conv2D(64, (7, 7), padding='same',
                kernel_initializer=init)(probe_image_target)
    pn = BatchNormalization(axis=-1)(pn)
    pn = LeakyReLU(alpha=0.2)(pn)
    pn = AveragePooling2D(pool_size=(2, 2))(pn)

    pn = Conv2D(128, (3, 3),
                strides=(2, 2),
                padding='same',
                kernel_initializer=init)(pn)
    pn = BatchNormalization(axis=-1)(pn)
    pn = LeakyReLU(alpha=0.2)(pn)
    pn = AveragePooling2D(pool_size=(2, 2))(pn)

    pn = Conv2D(256, (3, 3),
                strides=(2, 2),
                padding='same',
                kernel_initializer=init)(pn)
    pn = BatchNormalization(axis=-1)(pn)
    pn = LeakyReLU(alpha=0.2)(pn)

    g = Flatten()(conv3)
    pn = Flatten()(pn)

    g = Concatenate()([g, pn])

    g = Dense(latent_dim, activation='relu', kernel_initializer=init)(g)

    g = Dense(16 * 16 * 256, activation='relu', kernel_initializer=init)(g)

    g = Reshape((16, 16, 256))(g)
    sub_layer1 = Lambda(lambda x: tf.nn.depth_to_space(x, 2))
    sub_layer2 = Lambda(lambda x: tf.nn.depth_to_space(x, 2))
    up1 = Conv2DTranspose(128, (3, 3),
                          strides=(2, 2),
                          padding='same',
                          kernel_initializer=init)(sub_layer1(inputs=g))
    up1 = BatchNormalization(axis=-1)(up1)
    up1 = LeakyReLU(alpha=0.2)(up1)

    merge1 = Concatenate()([up1, conv2])

    up2 = Conv2DTranspose(64, (3, 3),
                          strides=(2, 2),
                          padding='same',
                          kernel_initializer=init)(sub_layer2(inputs=merge1))
    up2 = BatchNormalization(axis=-1)(up2)
    up2 = LeakyReLU(alpha=0.2)(up2)
    merge2 = Concatenate()([up2, conv1])

    final = Conv2D(3, (7, 7), padding='same', kernel_initializer=init)(merge2)
    final = BatchNormalization(axis=-1)(final)
    out_image = Activation('tanh')(final)
    model = Model([in_image, probe_image_target], out_image)
    return model
Example #26
0
    def build_model(self):
        hypo_input = Input(shape=(1, ), name='Hyponym')
        neg_input = Input(shape=(self.synonym_sample_n, ), name='Negative')
        hyper_input = Input(shape=(1, ), name='Hypernym')

        hypo_embedding, neg_embedding, hyper_embedding = self.embeddings_layer(
            [hypo_input, neg_input, hyper_input])

        hypo_embedding = Dropout(rate=0.3, name='Dropout_Hypo')(hypo_embedding)
        hyper_embedding = Dropout(rate=0.3,
                                  name='Dropout_Hyper')(hyper_embedding)
        neg_embedding = Dropout(rate=0.3, name='Dropout_Neg')(neg_embedding)

        phi_layer = []
        for i in range(self.phi_k):
            phi_layer.append(
                Dense(self.data.embeddings_matrix.shape[1],
                      activation=None,
                      use_bias=False,
                      kernel_initializer=RandomIdentity(),
                      name='Phi%d' % (i))(hypo_embedding))

        # either merge phi layers in 1 or flatten single phi projection
        if self.phi_k == 1:
            # flatten tensors
            phi = Flatten(name='Flatten_Phi')(phi_layer[0])
            #hyper_embedding = Flatten(name='Flatten_Hyper')(hyper_embedding)
        else:
            phi = Concatenate(axis=1)(phi_layer)

        phi = Dropout(rate=0.3, name='Dropout_Phi')(phi)

        # compute mean phi projection
        #phi_mean = Lambda(lambda x: K.mean(x, axis=1, keepdims=True))(phi)

        # compute synonymy similarity to each projection
        phi_negative = Dot(axes=-1, normalize=True,
                           name='SimNeg')([phi, neg_embedding])

        # compute hypernym similarity to each projection
        phi_hyper = Dot(axes=-1, normalize=True,
                        name='SimHyper')([phi, hyper_embedding])

        if self.phi_k > 1:
            # find projection which yields highest projection
            phi_hyper = Flatten(name='Flatten_PhiHyper')(phi_hyper)
            # in the case of multiple phi, calculate the mean similarity between each projection
            # and negative case
            phi_negative = Lambda(lambda x: K.mean(x, axis=1),
                                  name='MeanPhiNeg')(phi_negative)

        zero_neg = Lambda(lambda x: K.mean(x * 0., axis=-1),
                          name='ZeroPhiNeg')(phi_negative)
        phi_hyper = Subtract(name='DummySub')([phi_hyper, zero_neg])

        prediction = Dense(
            1,
            activation="sigmoid",
            name='Prediction',
            use_bias=True,
            kernel_initializer='random_normal',
            bias_initializer=Zeros(),
        )(phi_hyper)

        model = Model(inputs=[hypo_input, neg_input, hyper_input],
                      outputs=prediction)

        regul_loss = self.custom_loss(phi_negative, self.lambda_c)
        adam = Adam(lr=self.lr,
                    beta_1=self.beta1,
                    beta_2=self.beta2,
                    clipnorm=self.clip_value)
        #adam = Adadelta()
        model.compile(optimizer=adam, loss=regul_loss, metrics=['accuracy'])
        return model
Example #27
0
    """
    def return_pad_me(padding):
        def pad_me(x):
            #FRANK# x[:,:,:y,:] slice x off from y at the given axis.
            return (tf.concat((x, x[:, :, :padding, :]), 2))

    #         return(tf.concat((2,x,x[:,:,:padding,:])))

        return (pad_me)

    input_shape = x_train_eve[0].shape
    print("image shape", input_shape)

    model_event = Sequential(name='Sequential_for_event')
    model_event.add(
        Lambda(return_pad_me(4), input_shape=input_shape, name='event'))
    model_event.add(
        Conv2D(32,
               kernel_size=(5, 5),
               strides=(1, 1),
               activation='relu',
               data_format='channels_first',
               name='event_2D_1'))
    model_event.add(
        Lambda(return_pad_me(1),
               input_shape=input_shape,
               name='event_padding_1'))
    model_event.add(
        MaxPooling2D(pool_size=(2, 2),
                     strides=(2, 2),
                     data_format='channels_first',
Example #28
0
model = Sequential()
model.add(Convolution2D(16, 3, 3, activation='relu', input_shape=(128, 128, 3)))
model.add(BatchNormalization())
model.add(Convolution2D(16, 3, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(BatchNormalization())
# model.add(Dense(128, activation='relu'))
# model.add(Dropout(0.5))
# model.add(BatchNormalization())
model.add(Dense(len(trainSet.hotEncodeReverse), activation='softmax'))
model.add(Lambda(lambda x: x, name='colors_prob'))

model.summary()
#categorical_crossentropy
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])


saver = tf.train.Saver()

try:
    with open(os.path.join(model_n_ckpt_dir,'model.pb'), 'wb') as f:
        f.write(tf.keras.backend.get_session().graph_def.SerializeToString())
except:
    print("failed model n ckpt ")

model.fit(trainSet.allData['images'], trainSet.allData['labels'], batch_size=256, nb_epoch=5, verbose=1)
Example #29
0
    def learn_embedding(self,
                        graph=None,
                        edge_f=None,
                        is_weighted=False,
                        no_python=False):
        if not graph and not edge_f:
            raise Exception('graph/edge_f needed')
        if not graph:
            graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
        S = nx.to_scipy_sparse_matrix(graph)
        t1 = time()
        S = (S + S.T) / 2
        self._node_num = len(graph.nodes)

        # Generate encoder, decoder and autoencoder
        self._num_iter = self._n_iter
        # If cannot use previous step information, initialize new models
        self._encoder = get_encoder(self._node_num, self._d, self._K,
                                    self._n_units, self._nu1, self._nu2,
                                    self._actfn)
        self._decoder = get_decoder(self._node_num, self._d, self._K,
                                    self._n_units, self._nu1, self._nu2,
                                    self._actfn)
        self._autoencoder = get_autoencoder(self._encoder, self._decoder)

        # Initialize self._model
        # Input
        x_in = Input(shape=(2 * self._node_num, ), name='x_in')
        x1 = Lambda(lambda x: x[:, 0:self._node_num],
                    output_shape=(self._node_num, ))(x_in)
        x2 = Lambda(lambda x: x[:, self._node_num:2 * self._node_num],
                    output_shape=(self._node_num, ))(x_in)
        # Process inputs
        [x_hat1, y1] = self._autoencoder(x1)
        [x_hat2, y2] = self._autoencoder(x2)
        # Outputs
        x_diff1 = Subtract()([x_hat1, x1])
        x_diff2 = Subtract()([x_hat2, x2])
        y_diff = Subtract()([y2, y1])

        # Objectives
        def weighted_mse_x(y_true, y_pred):
            ''' Hack: This fn doesn't accept additional arguments.
                      We use y_true to pass them.
                y_pred: Contains x_hat - x
                y_true: Contains [b, deg]
            '''
            return KBack.sum(KBack.square(
                y_pred * y_true[:, 0:self._node_num]),
                             axis=-1) / y_true[:, self._node_num]

        def weighted_mse_y(y_true, y_pred):
            ''' Hack: This fn doesn't accept additional arguments.
                      We use y_true to pass them.
            y_pred: Contains y2 - y1
            y_true: Contains s12
            '''
            min_batch_size = KBack.shape(y_true)[0]
            return KBack.reshape(KBack.sum(KBack.square(y_pred), axis=-1),
                                 [min_batch_size, 1]) * y_true

        # Model
        self._model = Model(inputs=x_in, outputs=[x_diff1, x_diff2, y_diff])
        sgd = SGD(lr=self._xeta,
                  decay=1e-5,
                  momentum=0.99,
                  nesterov=True,
                  clipnorm=1.0)
        # adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        self._model.compile(
            optimizer=sgd,
            loss=[weighted_mse_x, weighted_mse_x, weighted_mse_y],
            loss_weights=[1, 1, self._alpha])

        self._model.fit_generator(
            generator=batch_generator_Teammate(S, self._beta, self._n_batch,
                                               True),
            epochs=self._num_iter,
            steps_per_epoch=S.nonzero()[0].shape[0] // self._n_batch,
            verbose=1)
        # Get embedding for all points
        self._Y = model_batch_predictor(self._autoencoder, S, self._n_batch)
        t2 = time()
        # Save the autoencoder and its weights
        if (self._weightfile is not None):
            saveweights(self._encoder, self._weightfile[0])
            saveweights(self._decoder, self._weightfile[1])
        if (self._modelfile is not None):
            savemodel(self._encoder, self._modelfile[0])
            savemodel(self._decoder, self._modelfile[1])
        if (self._savefilesuffix is not None):
            saveweights(self._encoder,
                        'T_encoder_weights_' + self._savefilesuffix + '.hdf5')
            saveweights(self._decoder,
                        'T_decoder_weights_' + self._savefilesuffix + '.hdf5')
            savemodel(self._encoder,
                      'T_encoder_model_' + self._savefilesuffix + '.json')
            savemodel(self._decoder,
                      'T_decoder_model_' + self._savefilesuffix + '.json')
            # Save the embedding
            np.savetxt('T_embedding_' + self._savefilesuffix + '.txt', self._Y)
        return self._Y, (t2 - t1)
Example #30
0
 def __call__(self, inputs, *args, **kwargs):
     # Unfortunate workaround necessary for TF < 1.13
     inputs_padded = Lambda(self._pad)(inputs)
     return super().__call__(inputs_padded, *args, **kwargs)
Example #31
0
 def __init__(self, dims):
     Lambda.__init__(self, lambda x: tf.transpose(x, dims))
Example #32
0
    def __init__(self,
                 num_classes=19,
                 output_stride=16,
                 backbonetype='mobilenetv2',
                 weights='imagenet',
                 dl_input_shape=(None, 483, 769, 3),
                 weight_decay=0.00004,
                 pooling='global',
                 residual_shortcut=False):
        super(CMSNet, self).__init__(name='cmsnet')
        """
        :param num_classes:  (Default value = 19)
        :param output_stride:  (Default value = 16) 
            if strid count is 4 remove stride from block 13 and inser atrous in 14, 15 and 16
            if strid count is 3 remove stride from block 6/13 and inser atrous rate 2 in 7-13/ and rate 4 14-16
        :param backbonetype:  (Default value = 'mobilenetv2')
        :param weights:  (Default value = 'imagenet')
        :param input_shape:  (Default value = (None, 483,769,3)
        :param weight_decay: use 0.00004 for MobileNet-V2 or Xcpetion model backbonetype. Use 0.0001 for ResNet backbonetype.

        """
        self.logger = logging.getLogger('perception.models.CMSNet')
        self.logger.info('creating an instance of CMSNet with backbone ' +
                         backbonetype + ', OS' + str(output_stride) +
                         ', nclass=' + str(num_classes) + ', input=' +
                         str(dl_input_shape) + ', pooling=' + pooling +
                         ', residual=' + str(residual_shortcut))

        self.num_classes = num_classes
        self.output_stride = output_stride
        self.dl_input_shape = dl_input_shape
        self._createBackbone(backbonetype=backbonetype,
                             output_stride=output_stride)
        # All with 256 filters and batch normalization.
        # one 1×1 convolution and three 3×3 convolutions with rates = (6, 12, 18) when output stride = 16.
        # Rates are doubled when output stride = 8.

        #Create Spatial Pyramid Pooling
        x = self.backbone.output

        pooling_shape = self.backbone.compute_output_shape(self.dl_input_shape)
        pooling_shape_float = tf.cast(pooling_shape[1:3], tf.float32)

        assert pooling in [
            'aspp', 'spp', 'global'
        ], "Only suported pooling= 'aspp', 'spp' or 'global'."

        if pooling == 'aspp':
            if output_stride == 16:
                rates = (6, 12, 18)
            elif output_stride == 8:
                rates = (12, 24, 36)
            #gride lavel: pooling
            x0 = Conv2D(filters=256,
                        kernel_size=3,
                        name='aspp_0_expand',
                        padding="same",
                        dilation_rate=rates[0],
                        kernel_regularizer=l2(weight_decay))(x)
            x0 = BatchNormalization(name='aspp_0_expand_BN')(x0)  #epsilon=1e-5
            x0 = ReLU(name='aspp_0_expand_relu')(x0)

            x1 = Conv2D(filters=256,
                        kernel_size=3,
                        name='aspp_1_expand',
                        padding="same",
                        dilation_rate=rates[1],
                        kernel_regularizer=l2(weight_decay))(x)
            x1 = BatchNormalization(name='aspp_1_expand_BN')(x1)  #epsilon=1e-5
            x1 = ReLU(name='aspp_1_expand_relu')(x1)

            x2 = Conv2D(filters=256,
                        kernel_size=3,
                        name='aspp_2_expand',
                        padding="same",
                        dilation_rate=rates[2],
                        kernel_regularizer=l2(weight_decay))(x)
            x2 = BatchNormalization(name='aspp_2_expand_BN')(x2)  #epsilon=1e-5
            x2 = ReLU(name='aspp_2_expand_relu')(x2)

            #gride lavel: all
            xn = Conv2D(filters=256,
                        kernel_size=1,
                        name='aspp_n_expand',
                        kernel_regularizer=l2(weight_decay))(x)
            xn = BatchNormalization(name='aspp_n_expand_BN')(xn)  #epsilon=1e-5
            xn = ReLU(name='aspp_n_expand_relu')(xn)

            #Concatenate spatial pyramid pooling
            x0.set_shape(pooling_shape[0:3].concatenate(x0.get_shape()[-1]))
            x1.set_shape(pooling_shape[0:3].concatenate(x1.get_shape()[-1]))
            x2.set_shape(pooling_shape[0:3].concatenate(x2.get_shape()[-1]))
            xn.set_shape(pooling_shape[0:3].concatenate(xn.get_shape()[-1]))
            x = Concatenate(name='aspp_concatenate')([x0, x1, x2, xn])

        elif pooling == 'spp':
            rates = (1, 2, 3, 6)
            #gride lavel: pooling
            x0 = AvgPool2D(pool_size=tf.cast(pooling_shape_float / rates[0],
                                             tf.int32),
                           padding="valid",
                           name='spp_0_average_pooling2d')(x)
            x0 = Conv2D(filters=int(pooling_shape[-1] / len(rates)),
                        kernel_size=1,
                        name='spp_0_expand',
                        kernel_regularizer=l2(weight_decay))(x0)
            x0 = BatchNormalization(name='spp_0_expand_BN')(x0)  #epsilon=1e-5
            x0 = ReLU(name='spp_0_expand_relu')(x0)
            if tf.__version__.split('.')[0] == '1':
                x0 = Lambda(lambda x0: tf.image.resize_bilinear(
                    x0, pooling_shape[1:3], align_corners=True),
                            name='spp_0_resize_bilinear')(x0)
            else:
                x0 = Lambda(lambda x0: tf.image.resize(x0,
                                                       pooling_shape[1:3],
                                                       method=tf.image.
                                                       ResizeMethod.BILINEAR),
                            name='spp_0_resize_bilinear')(x0)

            x1 = AvgPool2D(pool_size=tf.cast(pooling_shape_float / rates[1],
                                             tf.int32),
                           padding="valid",
                           name='spp_1_average_pooling2d')(x)
            x1 = Conv2D(filters=int(pooling_shape[-1] / len(rates)),
                        kernel_size=1,
                        name='spp_1_expand',
                        kernel_regularizer=l2(weight_decay))(x1)
            x1 = BatchNormalization(name='spp_1_expand_BN')(x1)  #epsilon=1e-5
            x1 = ReLU(name='spp_1_expand_relu')(x1)
            if tf.__version__.split('.')[0] == '1':
                x1 = Lambda(lambda x1: tf.image.resize_bilinear(
                    x1, pooling_shape[1:3], align_corners=True),
                            name='spp_1_resize_bilinear')(x1)
            else:
                x1 = Lambda(lambda x1: tf.image.resize(x1,
                                                       pooling_shape[1:3],
                                                       method=tf.image.
                                                       ResizeMethod.BILINEAR),
                            name='spp_1_resize_bilinear')(x1)

            x2 = AvgPool2D(pool_size=tf.cast(pooling_shape_float / rates[2],
                                             tf.int32),
                           padding="valid",
                           name='spp_2_average_pooling2d')(x)
            x2 = Conv2D(filters=int(pooling_shape[-1] / len(rates)),
                        kernel_size=1,
                        name='spp_2_expand',
                        kernel_regularizer=l2(weight_decay))(x2)
            x2 = BatchNormalization(name='spp_2_expand_BN')(x2)  #epsilon=1e-5
            x2 = ReLU(name='spp_2_expand_relu')(x2)
            if tf.__version__.split('.')[0] == '1':
                x2 = Lambda(lambda x2: tf.image.resize_bilinear(
                    x2, pooling_shape[1:3], align_corners=True),
                            name='spp_2_resize_bilinear')(x2)
            else:
                x2 = Lambda(lambda x2: tf.image.resize(x2,
                                                       pooling_shape[1:3],
                                                       method=tf.image.
                                                       ResizeMethod.BILINEAR),
                            name='spp_2_resize_bilinear')(x2)

            x3 = AvgPool2D(pool_size=tf.cast(pooling_shape_float / rates[3],
                                             tf.int32),
                           padding="valid",
                           name='spp_3_average_pooling2d')(x)
            x3 = Conv2D(filters=int(pooling_shape[-1] / len(rates)),
                        kernel_size=1,
                        name='spp_3_expand',
                        kernel_regularizer=l2(weight_decay))(x3)
            x3 = BatchNormalization(name='spp_3_expand_BN')(x3)  #epsilon=1e-5
            x3 = ReLU(name='spp_3_expand_relu')(x3)
            if tf.__version__.split('.')[0] == '1':
                x3 = Lambda(lambda x3: tf.image.resize_bilinear(
                    x3, pooling_shape[1:3], align_corners=True),
                            name='spp_3_resize_bilinear')(x3)
            else:
                x3 = Lambda(lambda x3: tf.image.resize(x3,
                                                       pooling_shape[1:3],
                                                       method=tf.image.
                                                       ResizeMethod.BILINEAR),
                            name='spp_3_resize_bilinear')(x3)

            #gride lavel: all
            xn = Conv2D(filters=int(pooling_shape[-1] / len(rates)),
                        kernel_size=1,
                        name='spp_n_expand',
                        kernel_regularizer=l2(weight_decay))(x)
            xn = BatchNormalization(name='spp_n_expand_BN')(xn)  #epsilon=1e-5
            xn = ReLU(name='spp_n_expand_relu')(xn)
            #Concatenate spatial pyramid pooling
            xn.set_shape(pooling_shape[0:3].concatenate(xn.get_shape()[-1]))
            x = Concatenate(name='spp_concatenate')([x0, x1, x2, xn])

        elif pooling == 'global':
            #gride lavel: pooling
            x0 = AvgPool2D(pool_size=pooling_shape[1:3],
                           padding="valid",
                           name='spp_0_average_pooling2d')(x)
            x0 = Conv2D(filters=256,
                        kernel_size=1,
                        name='spp_0_expand',
                        kernel_regularizer=l2(weight_decay))(x0)
            x0 = BatchNormalization(name='spp_0_expand_BN')(x0)  #epsilon=1e-5
            x0 = ReLU(name='spp_0_expand_relu')(x0)
            #        x0 = tf.image.resize(x0,
            #            size=pooling_shape[1:3],
            #            method=tf.image.ResizeMethod.BILINEAR, name='spp_0_resize_bilinear')
            if tf.__version__.split('.')[0] == '1':
                x0 = Lambda(lambda x0: tf.image.resize_bilinear(
                    x0, pooling_shape[1:3], align_corners=True),
                            name='spp_0_resize_bilinear')(x0)
            else:
                x0 = Lambda(lambda x0: tf.image.resize(x0,
                                                       pooling_shape[1:3],
                                                       method=tf.image.
                                                       ResizeMethod.BILINEAR),
                            name='spp_0_resize_bilinear')(x0)

            #gride lavel: all
            xn = Conv2D(filters=256,
                        kernel_size=1,
                        name='spp_1_expand',
                        kernel_regularizer=l2(weight_decay))(x)
            xn = BatchNormalization(name='spp_1_expand_BN')(xn)  #epsilon=1e-5
            xn = ReLU(name='spp_1_expand_relu')(xn)
            #Concatenate spatial pyramid pooling
            xn.set_shape(pooling_shape[0:3].concatenate(xn.get_shape()[-1]))
            x = Concatenate(name='spp_concatenate')([x0, xn])

        #Concate Projection
        x = Conv2D(filters=256,
                   kernel_size=1,
                   name='spp_concat_project',
                   kernel_regularizer=l2(weight_decay))(x)
        x = BatchNormalization(name='spp_concat_project_BN')(x)  #epsilon=1e-5
        x = ReLU(name='spp_concat_project_relu')(x)

        if residual_shortcut:
            assert output_stride == 16, "For while residual shotcut is available for atous with os16."

            #self.strideOutput8LayerName #block_6_project_BN (BatchNormal (None, 61, 97, 64)
            os8_shape = self.backbone.get_layer(
                self.strideOutput8LayerName).output_shape
            os8_output = self.backbone.get_layer(
                self.strideOutput8LayerName).output

            x = Conv2D(filters=os8_shape[-1],
                       kernel_size=1,
                       name='shotcut_2x_conv',
                       kernel_regularizer=l2(weight_decay))(x)
            x = BatchNormalization(name='shotcut_2x_BN')(x)  #epsilon=1e-5
            if tf.__version__.split('.')[0] == '1':
                x = Lambda(lambda x: tf.image.resize_bilinear(
                    x, os8_shape[1:3], align_corners=True),
                           name='shotcut_2x_bilinear')(x)
            else:
                x = Lambda(lambda x: tf.image.resize(
                    x, os8_shape[1:3], method=tf.image.ResizeMethod.BILINEAR),
                           name='shotcut_2x_bilinear')(x)
            x = ReLU(name='shotcut_2x_relu')(x)
            x = Add(name='shotcut_2x_add')([x, os8_output])

        x = Dropout(rate=0.1, name='dropout')(x)

        #Semantic Segmentation
        x = Conv2D(filters=num_classes,
                   kernel_size=1,
                   name='segmentation',
                   kernel_regularizer=l2(weight_decay))(x)
        #x = BatchNormalization(name='segmentation_BN')(x)
        #        x = tf.image.resize(x, size=self.dl_input_shape[1:3],
        #                method=tf.image.ResizeMethod.BILINEAR, name='segmentation_bilinear')
        if tf.__version__.split('.')[0] == '1':
            x = Lambda(lambda x: tf.image.resize_bilinear(
                x, self.dl_input_shape[1:3], align_corners=True),
                       name='segmentation_bilinear')(x)
        else:
            x = Lambda(lambda x: tf.image.resize(x,
                                                 self.dl_input_shape[1:3],
                                                 method=tf.image.ResizeMethod.
                                                 BILINEAR),
                       name='segmentation_bilinear')(x)
        x = Softmax(name='logistic_softmax')(x)
        #logist to training
        #argmax
        super(CMSNet, self).__init__(inputs=self.backbone.input,
                                     outputs=x,
                                     name='cmsnet')
Example #33
0
 def __init__(self, scale):
     Lambda.__init__(self, lambda x: tf.cast(x, tf.float32) * scale)
Example #34
0
def bilinear_cnn(num_classes,
                 input_shape,
                 backbone_cnn=None,
                 fB=None,
                 conv1x1=None,
                 dense_layers=[],
                 dropout_rate=None):
    '''Combine two feature extracting CNNs into single Model with bilinear_pooling + FC layers.
       fA and fB should output 4D tensors of equal shape, except (optionally) in # of channels.

    Parameters
    ----------
    fA : KerasModel or str
        Feature network A. Should output features (N, H, W, cA).
        If str, loads the corresponding ImageNet model from `keras.applications`.
    fB : KerasModel or str, optional
        Feature network B. Should output features (N, H, W, cB).
        If str, loads the corresponding ImageNet model from `keras.applications`.
        If `None`, will return symmetric BCNN using fA.
    num_classes : int
            Number of classes for softmax output layer
    input_shape : tuple of int
        Shape of input images. Must be compatible with fA.input & fB.input.
    conv1x1 : int or iterable(int), optional
        Add a 1x1 conv to reduce number of channels in (fA, fB) to some value(s).
        If iterable, must be length 2; values then mapped to (fA, fB).
    dense_layers : iterable of int, optional
        Sizes for additional Dense layers between bilinear vector and softmax. Default=[].
    dropout_rate: float, optional
        Specify a dropout rate for Dense layers

    Returns
    -------
    B-CNN : KerasModel
        Single bilinear CNN composed from fA & fB (asymmetric) or fA with itself (symmetric)
    '''
    assert backbone_cnn is not None
    fA = make_backbone(backbone_cnn, input_shape)
    fB = make_backbone(fB, input_shape)

    input_image = Input(shape=input_shape)

    outA = fA(input_image)
    if fB is None:
        outB = outA  # symmetric B-CNN
    else:
        outB = fB(input_image)  # asymmetric B-CNN

    if isinstance(conv1x1, int):
        outA = Conv2D(conv1x1, (1, 1), name='reduce_A')(outA)
        outB = Conv2D(conv1x1, (1, 1), name='reduce_B')(outB)
    elif hasattr(conv1x1, '__iter__'):
        assert len(conv1x1) == 2, 'if iterable, conv1x1 must have length of 2'
        outA = Conv2D(conv1x1[0], (1, 1), name='reduce_A')(outA)
        outB = Conv2D(conv1x1[1], (1, 1), name='reduce_B')(outB)

    x = Lambda(bilinear_pooling, name='bilinear_pooling')([outA, outB])

    x = make_dense_layers(dense_layers, dropout=dropout_rate)(x)

    pred = Dense(num_classes, activation='softmax')(x)

    model = KerasModel(inputs=input_image, outputs=pred)

    return model
Example #35
0
 def __init__(self, axis=-1):
     Lambda.__init__(self, lambda x: tf.squeeze(x, axis=axis))
Example #36
0
 def clipped_relu(self, inputs):
     relu = Lambda(lambda y: K.minimum(K.maximum(y, 0), 20),
                   name=f"clipped_relu_{self.clipped_relu_count}")(inputs)
     self.clipped_relu_count += 1
     return relu