def call(self, x): assert isinstance(x, list) inp_a, inp_b = x outp_a = K.l2_normalize(inp_a, -1) outp_b = K.l2_normalize(inp_b, -1) alpha = K.batch_dot(outp_b, outp_a, axes=[2, 2]) alpha = K.l2_normalize(alpha, 1) alpha = K.one_hot(K.argmax(alpha, 1), K.int_shape(inp_a)[1]) hmax = K.batch_dot(alpha, outp_b, axes=[1, 1]) kcon = K.eye(K.int_shape(inp_a)[1], dtype='float32') m = [] for i in range(self.output_dim): outp_a = inp_a * self.W[i] outp_hmax = hmax * self.W[i] outp_a = K.l2_normalize(outp_a, -1) outp_hmax = K.l2_normalize(outp_hmax, -1) outp = K.batch_dot(outp_hmax, outp_a, axes=[2, 2]) outp = K.sum(outp * kcon, -1, keepdims=True) m.append(outp) if self.output_dim > 1: persp = K.concatenate(m, 2) else: persp = m return [persp, persp]
def call(self, x, mask=None): nx = K.l2_normalize(x, axis=-1) nw = K.l2_normalize(self.W, axis=0) output = K.dot(nx, nw) if self.bias: output += self.b return self.activation(output)
def call(self, inputs): x1 = inputs[0] x2 = inputs[1] if self.normalize: x1 = K.l2_normalize(x1, axis=2) x2 = K.l2_normalize(x2, axis=2) output = K.tf.einsum('abd,fde,ace->afbc', x1, self.M, x2) return output
def euclidian_dist(self, x_pair): x1_norm = K.l2_normalize(x_pair[0], axis=1) x2_norm = K.l2_normalize(x_pair[1], axis=1) diff = x1_norm - x2_norm square = K.square(diff) sum = K.sum(square, axis=1) sum = K.clip(sum, min_value=1e-12, max_value=None) dist = K.sqrt(sum) return dist
def get_output(self, train): x = self.get_input(train) x -= K.mean(x, axis=1, keepdims=True) x = K.l2_normalize(x, axis=1) pos = K.relu(x) neg = K.relu(-x) return K.concatenate([pos, neg], axis=1)
def recurrence(y_i, h): h_permute = K.permute_dimensions(h, [0, 2, 1]) # (batch_size, encoding_dim, input_length) e = K.l2_normalize( K.batch_dot(h_permute, s, axes=1), # (batch_size, input_length) axis=1) # (batch_size, input_length) # eqn 6 alpha = K.softmax(e) # (batch_size, input_length) # eqn 5 c = K.batch_dot(h, alpha, axes=1) # (batch_size, encoding_dim) recurrence_result = K.expand_dims( K.concatenate([c, y_i], axis=1), dim=1) # (batch_size, 1, 2 * encoding_dim) expanded_h = Input(shape=(1, 2 * encoding_dim), name='expanded_h') gru = Sequential([ GRU(output_dim, return_sequences=False, input_shape=(1, 2 * encoding_dim)) ]) model = Model(input=[expanded_h], output=[gru(expanded_h)]) # (batch_size, 1, output_dim) return model(recurrence_result)
def call(self, x): assert isinstance(x, list) inp_a, inp_b = x last_state = K.expand_dims(inp_b[:, -1, :], 1) m = [] for i in range(self.output_dim): outp_a = inp_a * self.W[i] outp_last = last_state * self.W[i] outp_a = K.l2_normalize(outp_a, -1) outp_last = K.l2_normalize(outp_last, -1) outp = K.batch_dot(outp_a, outp_last, axes=[2, 2]) m.append(outp) if self.output_dim > 1: persp = K.concatenate(m, 2) else: persp = m return [persp, persp]
def call(self, x): assert isinstance(x, list) inp_a, inp_b = x m = [] for i in range(self.output_dim): outp_a = inp_a * self.W[i] outp_b = inp_b * self.W[i] outp_a = K.l2_normalize(outp_a, -1) outp_b = K.l2_normalize(outp_b, -1) outp = K.batch_dot(outp_a, outp_b, axes=[2, 2]) outp = K.max(outp, -1, keepdims=True) m.append(outp) if self.output_dim > 1: persp = K.concatenate(m, 2) else: persp = m return [persp, persp]
def hieroRecoModel_offline(input_shape): """ Arguments: input_shape -- shape of the images of the dataset Returns: model -- a Model() instance in Keras """ X_input = Input(input_shape) # Zero-Padding X = ZeroPadding2D((3, 3))(X_input) # First Block X = Conv2D(64, (3, 3), strides=(2, 2), name='conv1')(X) X = BatchNormalization(axis=1, name='bn1')(X) X = Activation('relu')(X) X = MaxPooling2D((3, 3), strides=2)(X) X = Conv2D(64, (3, 3))(X) X = Activation('relu')(X) X = MaxPooling2D((3, 3), strides=2)(X) X = Flatten()(X) X = Dense(128, name='dense_layer')(X) # L2 normalization X = Lambda(lambda x: K.l2_normalize(x, axis=1))(X) features = Model(X_input, X, name="features") # Inputs of the siamese network anchor = Input(shape=input_shape) positive = Input(shape=input_shape) negative = Input(shape=input_shape) # Embedding Features of input anchor_features = features(anchor) pos_features = features(positive) neg_features = features(negative) input_triplet = [anchor, positive, negative] output_features = [anchor_features, pos_features, neg_features] # Define the trainable model loss_model = Model(inputs=input_triplet, outputs=output_features,name='loss') loss_model.add_loss(K.mean(triplet_loss(output_features))) loss_model.compile(loss=None,optimizer='adam') # Create model instance #model = Model(inputs=X_input, outputs=X, name='HieroRecoModel_off') return features, loss_model
def make_patches(x, patch_size, patch_stride): from theano.tensor.nnet.neighbours import images2neibs x = K.expand_dims(x, 0) patches = images2neibs(x, (patch_size, patch_size), (patch_stride, patch_stride), mode='valid') # neibs are sorted per-channel patches = K.reshape(patches, (K.shape(x)[1], K.shape(patches)[0] // K.shape(x)[1], patch_size, patch_size)) patches = K.permute_dimensions(patches, (1, 0, 2, 3)) patches_norm = K.l2_normalize(patches, 1) return patches, patches_norm
def call(self, inputs, mask=None): cos_m = math.cos(self.m) sin_m = math.sin(self.m) mm = sin_m * self.m threshold = math.cos(math.pi - self.m) # inputs: # x: features, y_mask: 1-D or one-hot label works as mask x = inputs[0] y_mask = inputs[1] if y_mask.shape[-1]==1: y_mask = K.cast(y_mask, tf.int32) y_mask = K.reshape(K.one_hot(y_mask, self.class_num),(-1, self.class_num)) # feature norm x = K.l2_normalize(x, axis=1) # weights norm self.W = K.l2_normalize(self.W, axis=0) # cos(theta+m) cos_theta = K.dot(x, self.W) cos_theta2 = K.square(cos_theta) sin_theta2 = 1. - cos_theta2 sin_theta = K.sqrt(sin_theta2 + K.epsilon()) cos_tm = self.s * ((cos_theta * cos_m) - (sin_theta * sin_m)) # this condition controls the theta+m should in range [0, pi] # 0<=theta+m<=pi # -m<=theta<=pi-m cond_v = cos_theta - threshold cond = K.cast(K.relu(cond_v), dtype=tf.bool) keep_val = self.s * (cos_theta - mm) cos_tm_temp = tf.where(cond, cos_tm, keep_val) # mask by label y_mask =+ K.epsilon() inv_mask = 1. - y_mask s_cos_theta = self.s * cos_theta output = K.softmax((s_cos_theta * inv_mask) + (cos_tm_temp * y_mask)) return output
def call(self, inputs): x1 = inputs[0] x2 = inputs[1] if self.match_type in ['dot']: if self.normalize: x1 = K.l2_normalize(x1, axis=2) x2 = K.l2_normalize(x2, axis=2) output = K.tf.einsum('abd,acd->abc', x1, x2) output = K.tf.expand_dims(output, 3) elif self.match_type in ['mul', 'plus', 'minus']: x1_exp = K.tf.stack([x1] * self.shape2[1], 2) x2_exp = K.tf.stack([x2] * self.shape1[1], 1) if self.match_type == 'mul': output = x1_exp * x2_exp elif self.match_type == 'plus': output = x1_exp + x2_exp elif self.match_type == 'minus': output = x1_exp - x2_exp elif self.match_type in ['concat']: x1_exp = K.tf.stack([x1] * self.shape2[1], axis=2) x2_exp = K.tf.stack([x2] * self.shape1[1], axis=1) output = K.tf.concat([x1_exp, x2_exp], axis=3) return output
def __init__(self, input_tensor, losses, input_range=(0, 255), wrt_tensor=None, norm_grads=True): """Creates an optimizer that minimizes weighted loss function. Args: input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format= channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. losses: List of ([Loss](vis.losses#Loss), weight) tuples. input_range: Specifies the input range as a `(min, max)` tuple. This is used to rescale the final optimized input to the given range. (Default value=(0, 255)) wrt_tensor: Short for, with respect to. This instructs the optimizer that the aggregate loss from `losses` should be minimized with respect to `wrt_tensor`. `wrt_tensor` can be any tensor that is part of the model graph. Default value is set to None which means that loss will simply be minimized with respect to `input_tensor`. norm_grads: True to normalize gradients. Normalization avoids very small or large gradients and ensures a smooth gradient gradient descent process. If you want the actual gradient (for example, visualizing attention), set this to false. """ self.input_tensor = input_tensor self.input_range = input_range self.loss_names = [] self.loss_functions = [] self.wrt_tensor = self.input_tensor if wrt_tensor is None else wrt_tensor if self.input_tensor is self.wrt_tensor: self.wrt_tensor_is_input_tensor = True self.wrt_tensor = K.identity(self.wrt_tensor) else: self.wrt_tensor_is_input_tensor = False overall_loss = None for loss, weight in losses: # Perf optimization. Don't build loss function with 0 weight. if weight != 0: loss_fn = weight * loss.build_loss() overall_loss = loss_fn if overall_loss is None else overall_loss + loss_fn self.loss_names.append(loss.name) self.loss_functions.append(loss_fn) # Compute gradient of overall with respect to `wrt` tensor. if self.wrt_tensor_is_input_tensor: grads = K.gradients(overall_loss, self.input_tensor)[0] else: grads = K.gradients(overall_loss, self.wrt_tensor)[0] if norm_grads: grads = K.l2_normalize(grads) # The main function to compute various quantities in optimization loop. self.compute_fn = K.function([self.input_tensor, K.learning_phase()], self.loss_functions + [overall_loss, grads, self.wrt_tensor])
def VQA_MFB(image_model, text_model): # lstm_out=lstm.output # ''' # Question Attention # ''' qatt_conv1 = Convolution2D(512, (1, 1))(text_model.output) qatt_relu = LeakyReLU()(qatt_conv1) qatt_conv2 = Convolution2D(2, (1, 1))(qatt_relu) # (N,L,1,2) qatt_conv2 = Lambda(lambda x: K.squeeze(x, axis=2))(qatt_conv2) qatt_conv2 = Permute((2, 1))(qatt_conv2) qatt_softmax = Activation("softmax")(qatt_conv2) qatt_softmax = Reshape((11, 1, 2))(qatt_softmax) def t_qatt_mask(tensors): # lstm=lstm_text_model(emb_mat,emb_mat.shape[0],padding_train_ques.shape[1]) # lstm_out=lstm.output qatt_feature_list = [] ten1 = tensors[0] ten2 = tensors[1] for i in range(2): t_qatt_mask = ten1[:, :, :, i] # (N,1,L,1) t_qatt_mask = K.reshape(t_qatt_mask, (-1, 11, 1, 1)) t_qatt_mask = t_qatt_mask * ten2 # (N,1024,L,1) # print(t_qatt_mask) # t_qatt_mask = K.sum(t_qatt_mask,axis=1,keepdims=True) t_qatt_mask = K.sum(t_qatt_mask, axis=1, keepdims=True) # print(t_qatt_mask) qatt_feature_list.append(t_qatt_mask) qatt_feature_concat = K.concatenate(qatt_feature_list) # (N,2048,1,1) return qatt_feature_concat q_feat_resh = Lambda(t_qatt_mask)([qatt_softmax, text_model.output]) q_feat_resh = Lambda(lambda x: K.squeeze(x, axis=2))(q_feat_resh) q_feat_resh = Permute((2, 1))(q_feat_resh) q_feat_resh = Reshape([2048])(q_feat_resh) # (N,2048) # ''' # MFB Image with Attention # ''' # image_feature=vgg.output i_feat_resh = Reshape((196, 1, 512))(image_model.output) # (N,512,196) iatt_fc = Dense(1024, activation="tanh")(q_feat_resh) # (N,5000) iatt_resh = Reshape((1, 1, 1024))(iatt_fc) # (N,5000,1,1) iatt_conv = Convolution2D(1024, (1, 1))(i_feat_resh) # (N,5000,196,1) iatt_conv = LeakyReLU()(iatt_conv) iatt_eltwise = multiply([iatt_resh, iatt_conv]) # (N,5000,196,1) iatt_droped = Dropout(0.1)(iatt_eltwise) iatt_permute1 = Permute((3, 2, 1))(iatt_droped) # (N,196,5000,1) iatt_resh2 = Reshape((512, 2, 196))(iatt_permute1) iatt_sum = Lambda(lambda x: K.sum(x, axis=2, keepdims=True))(iatt_resh2) iatt_permute2 = Permute((3, 2, 1))(iatt_sum) # (N,1000,196,1) iatt_sqrt = Lambda(lambda x: K.sqrt(Activation("relu")(x)) - K.sqrt( Activation("relu")(-x)))(iatt_permute2) iatt_sqrt = Reshape([-1])(iatt_sqrt) iatt_l2 = Lambda(lambda x: K.l2_normalize(x, axis=1))(iatt_sqrt) iatt_l2 = Reshape((196, 1, 512))(iatt_l2) iatt_conv1 = Convolution2D(512, (1, 1))(iatt_l2) # (N,512,196,1) iatt_relu = LeakyReLU()(iatt_conv1) iatt_conv2 = Convolution2D(2, (1, 1))(iatt_relu) # (N,2,196,1) iatt_conv2 = Reshape((2, 196))(iatt_conv2) iatt_softmax = Activation("softmax")(iatt_conv2) iatt_softmax = Reshape((196, 1, 2))(iatt_softmax) def iatt_feature_list(tensors): # global i_feat_resh ten3 = tensors[0] ten4 = tensors[1] iatt_feature_list = [] for j in range(2): iatt_mask = ten3[:, :, :, j] # (N,1,196,1) iatt_mask = K.reshape(iatt_mask, (-1, 196, 1, 1)) iatt_mask = iatt_mask * ten4 # (N,512,196,1) # print(iatt_mask) iatt_mask = K.sum(iatt_mask, axis=1, keepdims=True) iatt_feature_list.append(iatt_mask) iatt_feature_cat = K.concatenate(iatt_feature_list) # (N,1024,1,1) return iatt_feature_cat iatt_feature_cat = Lambda(iatt_feature_list)([iatt_softmax, i_feat_resh]) iatt_feature_cat = Lambda(lambda x: K.squeeze(x, axis=2))(iatt_feature_cat) iatt_feature_cat = Permute((2, 1))(iatt_feature_cat) iatt_feature_cat = Reshape([1024])(iatt_feature_cat) # ''' # Fine-grained Image-Question MFH fusion # ''' # print(q_feat_resh.shape) # print(bert_encode.shape) # if mode != 'train': # q_feat_resh = q_feat_resh.unsqueeze(0) mfb_q = Dense(1024, activation="tanh")(q_feat_resh) # (N,5000) mfb_i = Dense(1024)(iatt_feature_cat) # (N,5000) mfb_i = LeakyReLU()(mfb_i) mfb_eltwise = multiply([mfb_q, mfb_i]) mfb_drop1 = Dropout(0.1)(mfb_eltwise) mfb_resh = Reshape((512, 2, 1))(mfb_drop1) # (N,1,1000,5) mfb_sum = Lambda(lambda x: K.sum(x, axis=2, keepdims=True))(mfb_resh) mfb_out = Reshape([512])(mfb_sum) mfb_sqrt = Lambda(lambda x: K.sqrt(Activation("relu")(x)) - K.sqrt( Activation("relu")(-x)))(mfb_out) # if mode != 'train': # mfb_sqrt = mfb_sqrt.unsqueeze(0) mfb_l2_1 = Lambda(lambda x: K.l2_normalize(x, axis=1))(mfb_sqrt) # mfb_q2 = Dense(1024, activation="tanh")(text_model.output) # (N,5000) # mfb_i2 = Dense(1024)(iatt_feature_cat) # (N,5000) # mfb_i2=LeakyReLU()(mfb_i2) # mfb_eltwise2 = multiply([mfb_q2, mfb_i2]) # (N,5000) # mfb_eltwise2 =multiply([mfb_eltwise2, mfb_drop1]) # mfb_drop2 = Dropout(0.1)(mfb_eltwise2) # mfb_resh2 = Reshape( (512,2,1))(mfb_drop2) # # # (N,1,1000,5) # mfb_sum2 = Lambda(lambda x: K.sum(x, 2, keepdims=True))(mfb_resh2) # mfb_out2 =Reshape([512])(mfb_sum2) # mfb_sqrt2 =Lambda(lambda x: K.sqrt(Activation("relu")(x)) - K.sqrt(Activation("relu")(-x)))(mfb_out2) # # if mode != 'train': # # mfb_sqrt2 = mfb_sqrt2.unsqueeze(0) # mfb_l2_2 =Lambda(lambda x: K.l2_normalize(x,axis=1))(mfb_sqrt2) # mfb_l2_3 = Concatenate(axis=1)([mfb_l2_1, mfb_l2_2]) # (N,2000) fc1 = Dense(1024)(mfb_l2_1) fc1_lr = LeakyReLU()(fc1) prediction = Dense(len(class_to_label), activation="softmax")(fc1_lr) vqa_model = Model(inputs=[image_model.input, text_model.input], outputs=prediction) vqa_model.compile(loss='categorical_crossentropy', optimizer="SGD", metrics=["accuracy"]) return vqa_model
def cosine_distance(vests): x, y = vests x = K.l2_normalize(x, axis=-1) y = K.l2_normalize(y, axis=-1) return -K.mean(x * y, axis=-1, keepdims=True)
def call(self, x, mask=None): output = K.l2_normalize(x, self.axis) output *= self.gamma return output
def ipca_model(concept_arraynew2, dense2, predict, f_train, y_train, f_val, y_val, n_concept, verbose=False, epochs=20, metric='binary_accuracy'): """Returns main function of ipca.""" pool1f_input = Input(shape=(f_train.shape[1],), name='pool1_input') cluster_input = K.variable(concept_arraynew2) proj_weight = Weight((f_train.shape[1], n_concept))(pool1f_input) proj_weight_n = Lambda(lambda x: K.l2_normalize(x, axis=0))(proj_weight) eye = K.eye(n_concept) * 1e-5 proj_recon_t = Lambda( lambda x: K.dot(x, tf.linalg.inv(K.dot(K.transpose(x), x) + eye)))( proj_weight) proj_recon = Lambda(lambda x: K.dot(K.dot(x[0], x[2]), K.transpose(x[1])))( [pool1f_input, proj_weight, proj_recon_t]) # proj_recon2 = Lambda(lambda x: x[0] - K.dot(K.dot(x[0],K.dot(x[1], # tf.linalg.inv(K.dot(K.transpose(x[1]), x[1]) + 1e-5 * K.eye(n_concept)))), # K.transpose(x[1])))([pool1f_input, proj_weight]) cov1 = Lambda(lambda x: K.mean(K.dot(x[0], x[1]), axis=1))( [cluster_input, proj_weight_n]) cov0 = Lambda(lambda x: x - K.mean(x, axis=0, keepdims=True))(cov1) cov0_abs = Lambda(lambda x: K.abs(K.l2_normalize(x, axis=0)))(cov0) cov0_abs_flat = Lambda(lambda x: K.reshape(x, (-1, n_concept)))(cov0_abs) cov = Lambda(lambda x: K.dot(K.transpose(x), x))(cov0_abs_flat) fc2_pr = dense2(proj_recon) softmax_pr = predict(fc2_pr) # fc2_pr2 = dense2(proj_recon2) # softmax_pr2 = predict(fc2_pr2) finetuned_model_pr = Model(inputs=pool1f_input, outputs=softmax_pr) # finetuned_model_pr2 = Model(inputs=pool1f_input, outputs=softmax_pr2) # finetuned_model_pr2.compile(loss= # concept_loss(cov,cov0_abs,0), # optimizer = sgd(lr=0.), # metrics=['binary_accuracy']) finetuned_model_pr.layers[-1].activation = sigmoid print(finetuned_model_pr.layers[-1].activation) finetuned_model_pr.layers[-1].trainable = False # finetuned_model_pr2.layers[-1].trainable = False finetuned_model_pr.layers[-2].trainable = False finetuned_model_pr.layers[-3].trainable = False # finetuned_model_pr2.layers[-2].trainable = False finetuned_model_pr.compile( loss=concept_loss(cov, cov0_abs, 0, n_concept), optimizer=Adam(lr=0.001), metrics=[metric]) # finetuned_model_pr2.compile( # loss=concept_variance(cov, cov0_abs, 0), # optimizer=SGD(lr=0.0), # metrics=['binary_accuracy']) if verbose: print(finetuned_model_pr.summary()) # finetuned_model_pr2.summary() finetuned_model_pr.fit( f_train, y_train, batch_size=50, epochs=epochs, validation_data=(f_val, y_val), verbose=verbose) finetuned_model_pr.layers[-1].trainable = False finetuned_model_pr.layers[-2].trainable = False finetuned_model_pr.layers[-3].trainable = False finetuned_model_pr.compile( loss=concept_loss(cov, cov0_abs, 1, n_concept), optimizer=Adam(lr=0.001), metrics=[metric]) return finetuned_model_pr # , finetuned_model_pr2
def call(self, x, mask=None): x -= K.mean(x, axis=1, keepdims=True) x = K.l2_normalize(x, axis=1) pos = K.relu(x) neg = K.relu(-x) return K.concatenate([pos, neg], axis=1)
def exponent_neg_cosine_distance(x, hidden_size=50): ''' Helper function for the similarity estimate of the LSTMs outputs ''' leftNorm = K.l2_normalize(x[:, :hidden_size], axis=-1) rightNorm = K.l2_normalize(x[:, hidden_size:], axis=-1) return K.exp( K.sum(K.prod([leftNorm, rightNorm], axis=0), axis=1, keepdims=True))
def conv2flat(x): fmodel = Sequential() fmodel.add(Flatten(input_shape=x.shape[1:])) fmodel.add(Lambda(lambda x: K.l2_normalize(x, 1))) return fmodel.predict(x, verbose=True, batch_size=32)
def cosine_distance(left,right): left = K.l2_normalize(left, axis=-1) right = K.l2_normalize(right, axis=-1) return -K.mean(left * right, axis=-1, keepdims=True)
def gpu_pairwise_cosine(A): """ Computes the pairwise cosine similarity matrix """ A = K.l2_normalize(A, axis=-1) A_tr = K.transpose(A) return K.dot(A, A_tr)
def call(self, inputs, **kwargs): output = K.l2_normalize(inputs, self.axis) output *= self.gamma return output
def train_top_model(output_train, output_val, instance): global Nr global Nt H_normalization_factor = np.sqrt(Nr * Nt) # load bottleneck features with open( f'models/{quantization}/sample_complexity/bottleneck_features_train{instance}.npy', 'rb') as file1: input_train = np.load(file1) with open( f'models/{quantization}/sample_complexity/bottleneck_features_val{instance}.npy', 'rb') as file2: input_val = np.load(file2) top_model = Sequential() top_model.add(Flatten()) top_model.add( Dense(numOutputs, activation="linear", kernel_initializer=keras.initializers.glorot_normal(1))) top_model.add( Lambda(lambda x: H_normalization_factor * K.l2_normalize(x, axis=-1))) top_model.add(Reshape(output_dim)) top_model.compile(loss="mse", optimizer="adam") start_time = time.time() start_memory = p.memory_info().rss top_model.fit( input_train, output_train, epochs=epochs, #batch_size = 64, shuffle=True, validation_data=(input_val, output_val), callbacks=[ keras.callbacks.EarlyStopping( monitor="val_loss", min_delta=1e-7, patience=5, # restore_best_weights=True, ), keras.callbacks.ReduceLROnPlateau( factor=0.5, min_delta=1e-7, patience=2, cooldown=5, verbose=1, min_lr=1e-6, ), ], ) cpu_time[str(instance)] = (time.time() - start_time) memory_consumption[str(instance)] = (p.memory_info().rss - start_memory) top_model.save_weights( f'models/{quantization}/sample_complexity/bottleneck_fc_model_weights{instance}.h5' ) return top_model
def cosine_proximity(y_true, y_pred): y_true = K.l2_normalize(y_true, axis=-1) y_pred = K.l2_normalize(y_pred, axis=-1) return -K.mean(y_true * y_pred)
def call(self, x, mask=None): output = K.l2_normalize(x, self.axis) # print(output.shape) # print(self.gamma.shape) return output * self.gamma
x1 = Convolution2D(64, (1,1), padding='same', activation="relu")(x1) x1 = BatchNormalization()(x1) x1 = Dropout(0.4)(x1) """ # Flatten层用来将输入“压平”,即把多维的输入一维化,常用在从卷积层到(Convolution)全连接层(Dense)的过渡。 x1 = Flatten()(x1) #Dense全连接层 x1 = Dense(512, activation="relu")(x1) x1 = Dropout(0.2)(x1) #x1 = BatchNormalization()(x1) feat_x = Dense(128, activation="linear")(x1) #经过lambda计算,如果该layer是input layer,则lambda的input是当前layer 的input,否则是previous layer的input feat_x = Lambda(lambda x: K.l2_normalize(x, axis=1))(feat_x) model_top = Model(inputs=[im_in], outputs=feat_x) model_top.summary() im_in1 = Input(shape=(200, 200, 4)) im_in2 = Input(shape=(200, 200, 4)) feat_x1 = model_top(im_in1) feat_x2 = model_top(im_in2) #计算距离? lambda_merge = Lambda(euclidean_distance)([feat_x1, feat_x2])
def memLstm_custom_model(hparams, context, context_mask, utterances, context_profile_flag, utterances_profile_flag): print("context: ", context._keras_shape) print("context_mask: ", context_mask._keras_shape) print("utterances: ", utterances._keras_shape) print("context_profile_flag: ", context_profile_flag._keras_shape) print("utterances_profile_flag: ", utterances_profile_flag._keras_shape) # print("profile_mask: ", profile_mask._keras_shape) # Use embedding matrix pretrained by Gensim # embeddings_W = np.load('data/advising/wiki_advising_embedding_W.npy') embeddings_W = np.load( '/ext2/dstc7/data/wiki_advising_aKB_test1_test2_embedding_W.npy') print("embeddings_W: ", embeddings_W.shape) ################################## Define Regular Layers ################################## # Utterances Embedding (Output shape: NUM_OPTIONS(100) x BATCH_SIZE(?) x LEN_SEQ(160) x EMBEDDING_DIM(300)) embedding_context_layer = Embedding( input_dim=hparams.vocab_size, output_dim=hparams.memn2n_embedding_dim, weights=[embeddings_W], input_length=hparams.max_context_len, mask_zero=True, trainable=False) embedding_utterance_layer = Embedding( input_dim=hparams.vocab_size, output_dim=hparams.memn2n_embedding_dim, weights=[embeddings_W], input_length=hparams.max_utterance_len, mask_zero=True, trainable=False) # Define LSTM Context encoder 1 LSTM_A = LSTM(hparams.memn2n_rnn_dim, input_shape=(hparams.max_context_len, hparams.memn2n_embedding_dim), use_bias=True, unit_forget_bias=True, return_state=True, return_sequences=True) # Define LSTM Utterances encoder LSTM_B = LSTM(hparams.memn2n_rnn_dim, input_shape=(hparams.max_utterance_len, hparams.memn2n_embedding_dim), use_bias=True, unit_forget_bias=True, return_state=False, return_sequences=False) ''' # Define LSTM Context encoder 2 LSTM_C = LSTM(hparams.memn2n_rnn_dim, input_shape=(hparams.max_context_len, hparams.memn2n_embedding_dim+2), unit_forget_bias=True, return_state=False, return_sequences=True) ''' # Define Dense layer to transform utterances Matrix_utterances = Dense( hparams.memn2n_rnn_dim, use_bias=False, kernel_initializer=keras.initializers.TruncatedNormal(mean=0.0, stddev=1.0, seed=None), input_shape=(hparams.memn2n_rnn_dim, )) # Define Dense layer to do softmax Dense_2 = Dense(1, use_bias=False, kernel_initializer=keras.initializers.TruncatedNormal( mean=0.0, stddev=1.0, seed=None), input_shape=(hparams.memn2n_rnn_dim, )) ################################## Define Custom Layers ################################## # Define max layer max_layer = Lambda(lambda x: K.max(x, axis=-1)) # Define repeat element layer custom_repeat_layer = Lambda( lambda x: K.repeat_elements(x, hparams.max_context_len, 1)) custom_repeat_layer2 = Lambda( lambda x: K.repeat_elements(x, hparams.num_utterance_options, 1)) # Expand dimension layer expand_dim_layer = Lambda(lambda x: K.expand_dims(x, axis=1)) # Amplify layer amplify_layer = Lambda(lambda x: x * hparams.amplify_val) # Define Softmax layer softmax_layer = Lambda(lambda x: K.softmax(Masking()(x), axis=-1)) softmax_layer2 = Lambda(lambda x: K.softmax(Masking()(x), axis=1)) # Define Stack & Concat layers Stack = Lambda(lambda x: K.stack(x, axis=1)) # Naming tensors responses_dot_layer = Lambda(lambda x: x, name='responses_dot') responses_attention_layer = Lambda(lambda x: x, name='responses_attention') context_attention_layer = Lambda(lambda x: x, name='context_attention') # Concat = Lambda(lambda x: K.concatenate(x, axis=1)) # Sum up last dimension Sum = Lambda(lambda x: K.sum(x, axis=-1)) Sum2 = Lambda(lambda x: K.sum(x, axis=1)) # Normalize layer Normalize = Lambda(lambda x: K.l2_normalize(x, axis=-1)) # Define tensor slice layer GetFirstHalfTensor = Lambda(lambda x: x[:, :, :hparams.memn2n_rnn_dim]) GetFirstTensor = Lambda(lambda x: x[:, 0, :]) GetLastHalfTensor = Lambda(lambda x: x[:, :, hparams.memn2n_rnn_dim:]) GetLastTensor = Lambda(lambda x: x[:, -1, :]) GetReverseTensor = Lambda(lambda x: K.reverse(x, axes=1)) ################################## Apply layers ################################## # Prepare Masks utterances_mask = Reshape((1, hparams.max_context_len))(context_mask) utterances_mask = custom_repeat_layer2(utterances_mask) context_mask = Reshape((hparams.max_context_len, 1))(context_mask) # Prepare Profile # context_profile_flag_max = max_layer(context_profile_flag) # context_profile_flag_max = Reshape((hparams.max_context_len,1))(context_profile_flag_max) # Context Embedding: (BATCH_SIZE(?) x CONTEXT_LEN x EMBEDDING_DIM) context_embedded = embedding_context_layer(context) print("context_embedded: ", context_embedded.shape) print("context_embedded (history): ", context_embedded._keras_history, '\n') # Skip this? # context_embedded = Concatenate(axis=-1)([context_embedded, context_speaker]) # Utterances Embedding: (BATCH_SIZE(?) x NUM_OPTIONS x UTTERANCE_LEN x EMBEDDING_DIM) utterances_embedded = TimeDistributed( embedding_utterance_layer, input_shape=(hparams.num_utterance_options, hparams.max_utterance_len))(utterances) print("Utterances_embedded: ", utterances_embedded.shape) print("Utterances_embedded (history): ", utterances_embedded._keras_history, '\n') # Encode context A: (BATCH_SIZE(?) x CONTEXT_LEN x RNN_DIM) all_context_encoded_Forward,\ all_context_encoded_Forward_h,\ all_context_encoded_Forward_c = LSTM_A(context_embedded) all_context_encoded_Backward,\ all_context_encoded_Backward_h,\ all_context_encoded_Backward_c = LSTM_A(Masking()(GetReverseTensor(context_embedded)))#, #initial_state=[all_context_encoded_Forward_h, all_context_encoded_Forward_c]) all_context_encoded_Backward = Masking()( GetReverseTensor(all_context_encoded_Backward)) # print("context_encoded_A: ", len(context_encoded_A)) print("all_context_encoded_Forward: ", all_context_encoded_Forward.shape) print("all_context_encoded_Forward (history): ", all_context_encoded_Forward._keras_history) print("all_context_encoded_Backward: ", all_context_encoded_Backward.shape) print("all_context_encoded_Backward (history): ", all_context_encoded_Backward._keras_history) # Define bi-directional all_context_encoded_Bidir_sum = Add()( [all_context_encoded_Forward, all_context_encoded_Backward]) print("all_context_encoded_Bidir_sum: ", all_context_encoded_Bidir_sum._keras_shape) print("all_context_encoded_Bidir_sum: (history)", all_context_encoded_Bidir_sum._keras_history, '\n') prof_aug_context_encoded_Forward = Concatenate(axis=-1)( [all_context_encoded_Forward, context_profile_flag]) prof_aug_context_encoded_Backward = Concatenate(axis=-1)( [all_context_encoded_Backward, context_profile_flag]) print("prof_aug_context_encoded_Forward: ", prof_aug_context_encoded_Forward.shape) print("prof_aug_context_encoded_Forward (history): ", prof_aug_context_encoded_Forward._keras_history) print("prof_aug_context_encoded_Backward: ", prof_aug_context_encoded_Backward.shape) print("prof_aug_context_encoded_Backward (history): ", prof_aug_context_encoded_Backward._keras_history, '\n') # Encode utterances B: (BATCH_SIZE(?) x NUM_OPTIONS(100) x RNN_DIM) all_utterances_encoded_B = TimeDistributed( LSTM_B, input_shape=(hparams.num_utterance_options, hparams.max_utterance_len, hparams.memn2n_embedding_dim))(utterances_embedded) all_utterances_encoded_B = TimeDistributed( Matrix_utterances, input_shape=(hparams.num_utterance_options, hparams.memn2n_rnn_dim))(all_utterances_encoded_B) print("all_utterances_encoded_B: ", all_utterances_encoded_B._keras_shape) print("all_utterances_encoded_B: (history)", all_utterances_encoded_B._keras_history, '\n') responses_attention = [] responses_dot = [] context_attention = None for i in range(hparams.hops): print(str(i + 1) + 'th hop:') # 1st Attention & Weighted Sum # between Utterances_B(NUM_OPTIONS x RNN_DIM) and Contexts_encoded_Forward(CONTEXT_LEN x RNN_DIM) # and apply Softmax # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x CONTEXT_LEN) prof_aug_utterances_encoded_B = Concatenate(axis=-1)( [all_utterances_encoded_B, utterances_profile_flag]) attention_Forward = Dot(axes=[2, 2])( [prof_aug_utterances_encoded_B, prof_aug_context_encoded_Forward]) dot_Forward = attention_Forward attention_Forward = amplify_layer(attention_Forward) attention_Forward = Add()([attention_Forward, utterances_mask]) attention_Forward = softmax_layer(attention_Forward) print("attention_Forward: ", attention_Forward._keras_shape) print("attention_Forward: (history)", attention_Forward._keras_history) # between Attention(NUM_OPTIONS x CONTEXT_LEN) and Contexts_A(CONTEXT_LEN x RNN_DIM) # equivalent to weighted sum of Contexts_A according to Attention # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x RNN_DIM) weighted_sum_Forward = Dot(axes=[2, 1])( [attention_Forward, all_context_encoded_Bidir_sum]) print("weighted_sum: ", weighted_sum_Forward._keras_shape) print("weighted_sum: (history)", weighted_sum_Forward._keras_history, '\n') # (Output shape: ? x NUM_OPTIONS(100) x RNN_DIM) all_utterances_encoded_B = Add()( [weighted_sum_Forward, all_utterances_encoded_B]) # 2nd Attention & Weighted Sum # between Utterances_B(NUM_OPTIONS x RNN_DIM) and Contexts_encoded_Backward(CONTEXT_LEN x RNN_DIM) # and apply Softmax # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x CONTEXT_LEN) prof_aug_utterances_encoded_B = Concatenate(axis=-1)( [all_utterances_encoded_B, utterances_profile_flag]) attention_Backward = Dot(axes=[2, 2])( [prof_aug_utterances_encoded_B, prof_aug_context_encoded_Backward]) dot_Backward = attention_Backward attention_Backward = amplify_layer(attention_Backward) attention_Backward = Add()([attention_Backward, utterances_mask]) attention_Backward = softmax_layer(attention_Backward) print("attention_Backward: ", attention_Backward._keras_shape) print("attention_Backward: (history)", attention_Backward._keras_history) # between Attention(NUM_OPTIONS x CONTEXT_LEN) and Contexts_A(CONTEXT_LEN x RNN_DIM) # equivalent to weighted sum of Contexts_A according to Attention # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x RNN_DIM) weighted_sum_Backward = Dot(axes=[2, 1])( [attention_Backward, all_context_encoded_Bidir_sum]) print("weighted_sum_Backward: ", weighted_sum_Backward._keras_shape) print("weighted_sum_Backward: (history)", weighted_sum_Backward._keras_history, '\n') # (Output shape: ? x NUM_OPTIONS(100) x RNN_DIM) all_utterances_encoded_B = Add()( [weighted_sum_Backward, all_utterances_encoded_B]) dot_Forward = Reshape((1, hparams.num_utterance_options, hparams.max_context_len))(dot_Forward) dot_Backward = Reshape((1, hparams.num_utterance_options, hparams.max_context_len))(dot_Backward) att_Forward = expand_dim_layer(attention_Forward) att_Backward = expand_dim_layer(attention_Backward) merge_dots = Concatenate(axis=1)([dot_Forward, dot_Backward]) merge_responses = Concatenate(axis=1)([att_Forward, att_Backward]) responses_dot.append(merge_dots) responses_attention.append(merge_responses) print("repsonses_attention[i]:", merge_responses._keras_shape) if i < hparams.hops - 1: continue ''' temp = all_context_encoded_Forward all_context_encoded_Forward = all_context_encoded_Backward all_context_encoded_Backward = temp ''' else: print("hop ended") ############# Attention to Context ############# # (Output shape: ? x MAX_CONTEXT_LEN x 1) attention_Forward_wrt_context =\ TimeDistributed(Dense_2, input_shape=(hparams.max_context_len, hparams.memn2n_rnn_dim))(all_context_encoded_Forward) attention_Forward_wrt_context = amplify_layer( attention_Forward_wrt_context) attention_Forward_wrt_context = Add()( [attention_Forward_wrt_context, context_mask]) attention_Forward_wrt_context = softmax_layer2( attention_Forward_wrt_context) # (Output shape: ? x 1 x RNN_DIM) weighted_sum_Forward_wrt_context = Dot(axes=[1, 1])( [attention_Forward_wrt_context, all_context_encoded_Bidir_sum]) # (Output shape: ? x MAX_CONTEXT_LEN x 1) attention_Backward_wrt_context =\ TimeDistributed(Dense_2, input_shape=(hparams.max_context_len, hparams.memn2n_rnn_dim))(all_context_encoded_Backward) attention_Backward_wrt_context = amplify_layer( attention_Backward_wrt_context) attention_Backward_wrt_context = Add()( [attention_Backward_wrt_context, context_mask]) attention_Backward_wrt_context = softmax_layer2( attention_Backward_wrt_context) # (Output shape: ? x 1 x RNN_DIM) weighted_sum_Backward_wrt_context = Dot(axes=[1, 1])([ attention_Backward_wrt_context, all_context_encoded_Bidir_sum ]) att_Forward_wrt_context = Reshape( (1, hparams.max_context_len))(attention_Forward_wrt_context) att_Backward_wrt_context = Reshape( (1, hparams.max_context_len))(attention_Backward_wrt_context) context_attention = Concatenate(axis=1)( [att_Forward_wrt_context, att_Backward_wrt_context]) context_encoded_AplusC = Add()([ weighted_sum_Forward_wrt_context, weighted_sum_Backward_wrt_context ]) print("context_encoded_AplusC: ", context_encoded_AplusC.shape) print("context_encoded_AplusC: (history)", context_encoded_AplusC._keras_history, '\n') # (Output shape: ? x 1 x NUM_OPTIONS(100)) logits = Dot(axes=[2, 2])( [context_encoded_AplusC, all_utterances_encoded_B]) logits = Reshape((hparams.num_utterance_options, ))(logits) print("logits: ", logits.shape) print("logits: (history)", logits._keras_history, '\n') # Softmax layer for probability of each of Dot products in previous layer # Softmaxing logits (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100)) probs = Activation('softmax', name='probs')(logits) print("probs: ", probs.shape) print("final History: ", probs._keras_history, '\n') # Return probabilities(likelihoods) of each of utterances # Those will be used to calculate the loss ('sparse_categorical_crossentropy') if hparams.hops == 1: responses_dot = Reshape((1, 2, hparams.num_utterance_options, hparams.max_context_len))(responses_dot[0]) responses_attention = Reshape( (1, 2, hparams.num_utterance_options, hparams.max_context_len))(responses_attention[0]) else: responses_dot = Stack(responses_dot) responses_attention = Stack(responses_attention) responses_dot = responses_dot_layer(responses_dot) responses_attention = responses_attention_layer(responses_attention) context_attention = context_attention_layer(context_attention) print("repsonses_attention:", responses_attention._keras_shape) print("context_attention:", context_attention._keras_shape) return probs, context_attention, responses_attention, responses_dot
def cos_sim(values): x, y = values x = K.l2_normalize(x, axis=-1) y = K.l2_normalize(y, axis=-1) return K.sum(x*y, axis=-1,keepdims=True)
def vggvox_model(): inp = Input(c.INPUT_SHAPE, name='input') # INPUT_SHAPE=(512,None,1) 灰度图像 x = conv_bn_pool(inp, layer_idx=1, conv_filters=96, conv_kernel_size=(7, 7), conv_strides=(2, 2), conv_pad=(1, 1), pool='max', pool_size=(3, 3), pool_strides=(2, 2)) x = conv_bn_pool(x, layer_idx=2, conv_filters=256, conv_kernel_size=(5, 5), conv_strides=(2, 2), conv_pad=(1, 1), pool='max', pool_size=(3, 3), pool_strides=(2, 2)) x = conv_bn_pool(x, layer_idx=3, conv_filters=384, conv_kernel_size=(3, 3), conv_strides=(1, 1), conv_pad=(1, 1)) x = conv_bn_pool(x, layer_idx=4, conv_filters=256, conv_kernel_size=(3, 3), conv_strides=(1, 1), conv_pad=(1, 1)) x = conv_bn_pool(x, layer_idx=5, conv_filters=256, conv_kernel_size=(3, 3), conv_strides=(1, 1), conv_pad=(1, 1), pool='max', pool_size=(5, 3), pool_strides=(3, 2)) x = conv_bn_dynamic_apool(x, layer_idx=6, conv_filters=4096, conv_kernel_size=(9, 1), conv_strides=(1, 1), conv_pad=(0, 0), conv_layer_prefix='fc') x = conv_bn_pool(x, layer_idx=7, conv_filters=1024, conv_kernel_size=(1, 1), conv_strides=(1, 1), conv_pad=(0, 0), conv_layer_prefix='fc') x = Lambda(lambda y: K.l2_normalize(y, axis=3), name='norm')(x) # L2 归一化 # x = Conv2D(filters=1024, kernel_size=(1, 1), strides=(1, 1), padding='valid', name='fc8')(x) x = Conv2D(filters=c.N_CLASS, kernel_size=(1, 1), strides=(1, 1), padding='valid', activation="softmax", name='fc8')(x) m = Model(inp, x, name='VGGVox') return m
forward3 = Conv1D(512, 17, padding="same", activation=activation_C)(pool2) dr3 = Dropout(dropout_size)(forward3) pool3 = MaxPooling1D(3, strides=2, padding='same')(dr3) # maxpooling across entire caption forward4 = Conv1D(1024, 17, padding="same", activation=activation_C)(pool3) dr4 = Dropout(dropout_size)(forward4) pool4 = MaxPooling1D(128, padding='same')(dr4) # maxpooling across entire caption out_audio = Reshape([int(dr4.shape[2])], name='reshape_audio')(pool4) out_audio = Dense(connection_size, activation='linear', name='dense_audio')(out_audio) out_audio = Lambda(lambda x: K.l2_normalize(x, axis=-1), name='out_audio')(out_audio) #.............................................................................. Visual Network visual_sequence = Input(shape=(4096, )) out_visual = Dense(connection_size, activation='linear', name='dense_visual')(visual_sequence) #25 out_visual = Lambda(lambda x: K.l2_normalize(x, axis=-1), name='out_visual')(out_visual) #.............................................................................. combining audio-visual networks L_layer = keras.layers.dot([out_visual, out_audio], axes=-1, name='dot')
def correlation(x, y): x = x - K.mean(x, 1, keepdims=True) y = y - K.mean(y, 1, keepdims=True) x = K.l2_normalize(x, 1) y = K.l2_normalize(y, 1) return K.sum(x * y, 1, keepdims=True)
def fcn_norm_loss_graph(target_masks, pred_heatmap): ''' Mask binary cross-entropy loss for the masks head. target_masks: [batch, height, width, num_classes]. pred_heatmap: [batch, height, width, num_classes] float32 tensor ''' # Reshape for simplicity. Merge first two dimensions into one. print('\n>>> fcn_norm_loss_graph ') print(' target_masks shape :', target_masks.shape) print(' pred_heatmap shape :', pred_heatmap.shape) print( '\n L2 normalization ------------------------------------------------------' ) pred_shape = KB.shape(pred_heatmap) print(' pred_shape: KB.shape:', pred_shape, ' tf.get_shape(): ', pred_heatmap.get_shape(), ' pred_maks.shape:', pred_heatmap.shape, 'tf.shape :', tf.shape(pred_heatmap)) output_flatten = KB.reshape(pred_heatmap, (pred_shape[0], -1, pred_shape[-1])) output_norm1 = KB.l2_normalize(output_flatten, axis=1) output_norm = KB.reshape(output_norm1, pred_shape) print(' output_flatten : ', KB.int_shape(output_flatten), output_flatten.get_shape(), ' Keras tensor ', KB.is_keras_tensor(output_flatten)) print(' output_norm1 : ', KB.int_shape(output_norm1), output_norm1.get_shape(), ' Keras tensor ', KB.is_keras_tensor(output_norm1)) print(' output_norm final : ', KB.int_shape(output_norm), output_norm.get_shape(), ' Keras tensor ', KB.is_keras_tensor(output_norm)) print( '\n L2 normalization ------------------------------------------------------' ) target_shape = KB.shape(target_masks) print(' target shape is :', target_shape, ' ', target_masks.get_shape(), target_masks.shape, tf.shape(target_masks)) gauss_flatten = KB.reshape(target_masks, (target_shape[0], -1, target_shape[-1])) gauss_norm1 = KB.l2_normalize(gauss_flatten, axis=1) gauss_norm = KB.reshape(gauss_norm1, target_shape) print(' guass_flatten : ', gauss_flatten.shape, gauss_flatten.get_shape(), 'Keras tensor ', KB.is_keras_tensor(gauss_flatten)) print(' gauss_norm shape : ', gauss_norm1.shape, gauss_norm1.get_shape(), 'Keras tensor ', KB.is_keras_tensor(gauss_norm1)) print(' gauss_norm final shape: ', gauss_norm.shape, gauss_norm.get_shape(), 'Keras tensor ', KB.is_keras_tensor(gauss_norm)) pred_heatmap1 = output_norm target_masks1 = gauss_norm # pred_shape = KB.shape(target_masks1) # print(' pred_shape shape :', pred_shape.eval(), KB.int_shape(pred_shape)) target_masks1 = KB.reshape(target_masks1, (-1, pred_shape[1], pred_shape[2])) print(' target_masks1 shape :', target_masks1.get_shape(), KB.int_shape(target_masks1)) pred_heatmap1 = KB.reshape(pred_heatmap1, (-1, pred_shape[1], pred_shape[2])) print(' pred_heatmap1 shape :', pred_heatmap1.get_shape()) # Compute binary cross entropy. If no positive ROIs, then return 0. # shape: [batch, roi, num_classes] # Smooth-L1 Loss loss = KB.switch( tf.size(target_masks1) > 0, smooth_l1_loss(y_true=target_masks1, y_pred=pred_heatmap1), tf.constant(0.0)) loss = KB.mean(loss) loss = KB.reshape(loss, [1, 1]) print(' loss type is :', type(loss)) return loss
def l2Norm(x): return K.l2_normalize(x, axis=-1)
def create_model(): myInput = Input(shape=(96, 96, 3)) x = ZeroPadding2D(padding=(3, 3), input_shape=(96, 96, 3))(myInput) x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) x = BatchNormalization(axis=3, epsilon=0.00001, name='bn1')(x) x = Activation('relu')(x) x = ZeroPadding2D(padding=(1, 1))(x) x = MaxPooling2D(pool_size=3, strides=2)(x) x = Lambda(LRN2D, name='lrn_1')(x) x = Conv2D(64, (1, 1), name='conv2')(x) x = BatchNormalization(axis=3, epsilon=0.00001, name='bn2')(x) x = Activation('relu')(x) x = ZeroPadding2D(padding=(1, 1))(x) x = Conv2D(192, (3, 3), name='conv3')(x) x = BatchNormalization(axis=3, epsilon=0.00001, name='bn3')(x) x = Activation('relu')(x) x = Lambda(LRN2D, name='lrn_2')(x) x = ZeroPadding2D(padding=(1, 1))(x) x = MaxPooling2D(pool_size=3, strides=2)(x) # Inception3a inception_3a_3x3 = Conv2D(96, (1, 1), name='inception_3a_3x3_conv1')(x) inception_3a_3x3 = BatchNormalization( axis=3, epsilon=0.00001, name='inception_3a_3x3_bn1')(inception_3a_3x3) inception_3a_3x3 = Activation('relu')(inception_3a_3x3) inception_3a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3a_3x3) inception_3a_3x3 = Conv2D(128, (3, 3), name='inception_3a_3x3_conv2')(inception_3a_3x3) inception_3a_3x3 = BatchNormalization( axis=3, epsilon=0.00001, name='inception_3a_3x3_bn2')(inception_3a_3x3) inception_3a_3x3 = Activation('relu')(inception_3a_3x3) inception_3a_5x5 = Conv2D(16, (1, 1), name='inception_3a_5x5_conv1')(x) inception_3a_5x5 = BatchNormalization( axis=3, epsilon=0.00001, name='inception_3a_5x5_bn1')(inception_3a_5x5) inception_3a_5x5 = Activation('relu')(inception_3a_5x5) inception_3a_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3a_5x5) inception_3a_5x5 = Conv2D(32, (5, 5), name='inception_3a_5x5_conv2')(inception_3a_5x5) inception_3a_5x5 = BatchNormalization( axis=3, epsilon=0.00001, name='inception_3a_5x5_bn2')(inception_3a_5x5) inception_3a_5x5 = Activation('relu')(inception_3a_5x5) inception_3a_pool = MaxPooling2D(pool_size=3, strides=2)(x) inception_3a_pool = Conv2D( 32, (1, 1), name='inception_3a_pool_conv')(inception_3a_pool) inception_3a_pool = BatchNormalization( axis=3, epsilon=0.00001, name='inception_3a_pool_bn')(inception_3a_pool) inception_3a_pool = Activation('relu')(inception_3a_pool) inception_3a_pool = ZeroPadding2D(padding=((3, 4), (3, 4)))(inception_3a_pool) inception_3a_1x1 = Conv2D(64, (1, 1), name='inception_3a_1x1_conv')(x) inception_3a_1x1 = BatchNormalization( axis=3, epsilon=0.00001, name='inception_3a_1x1_bn')(inception_3a_1x1) inception_3a_1x1 = Activation('relu')(inception_3a_1x1) inception_3a = concatenate([ inception_3a_3x3, inception_3a_5x5, inception_3a_pool, inception_3a_1x1 ], axis=3) # Inception3b inception_3b_3x3 = Conv2D(96, (1, 1), name='inception_3b_3x3_conv1')(inception_3a) inception_3b_3x3 = BatchNormalization( axis=3, epsilon=0.00001, name='inception_3b_3x3_bn1')(inception_3b_3x3) inception_3b_3x3 = Activation('relu')(inception_3b_3x3) inception_3b_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3b_3x3) inception_3b_3x3 = Conv2D(128, (3, 3), name='inception_3b_3x3_conv2')(inception_3b_3x3) inception_3b_3x3 = BatchNormalization( axis=3, epsilon=0.00001, name='inception_3b_3x3_bn2')(inception_3b_3x3) inception_3b_3x3 = Activation('relu')(inception_3b_3x3) inception_3b_5x5 = Conv2D(32, (1, 1), name='inception_3b_5x5_conv1')(inception_3a) inception_3b_5x5 = BatchNormalization( axis=3, epsilon=0.00001, name='inception_3b_5x5_bn1')(inception_3b_5x5) inception_3b_5x5 = Activation('relu')(inception_3b_5x5) inception_3b_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3b_5x5) inception_3b_5x5 = Conv2D(64, (5, 5), name='inception_3b_5x5_conv2')(inception_3b_5x5) inception_3b_5x5 = BatchNormalization( axis=3, epsilon=0.00001, name='inception_3b_5x5_bn2')(inception_3b_5x5) inception_3b_5x5 = Activation('relu')(inception_3b_5x5) inception_3b_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3a) inception_3b_pool = Conv2D( 64, (1, 1), name='inception_3b_pool_conv')(inception_3b_pool) inception_3b_pool = BatchNormalization( axis=3, epsilon=0.00001, name='inception_3b_pool_bn')(inception_3b_pool) inception_3b_pool = Activation('relu')(inception_3b_pool) inception_3b_pool = ZeroPadding2D(padding=(4, 4))(inception_3b_pool) inception_3b_1x1 = Conv2D(64, (1, 1), name='inception_3b_1x1_conv')(inception_3a) inception_3b_1x1 = BatchNormalization( axis=3, epsilon=0.00001, name='inception_3b_1x1_bn')(inception_3b_1x1) inception_3b_1x1 = Activation('relu')(inception_3b_1x1) inception_3b = concatenate([ inception_3b_3x3, inception_3b_5x5, inception_3b_pool, inception_3b_1x1 ], axis=3) # Inception3c inception_3c_3x3 = utils.conv2d_bn(inception_3b, layer='inception_3c_3x3', cv1_out=128, cv1_filter=(1, 1), cv2_out=256, cv2_filter=(3, 3), cv2_strides=(2, 2), padding=(1, 1)) inception_3c_5x5 = utils.conv2d_bn(inception_3b, layer='inception_3c_5x5', cv1_out=32, cv1_filter=(1, 1), cv2_out=64, cv2_filter=(5, 5), cv2_strides=(2, 2), padding=(2, 2)) inception_3c_pool = MaxPooling2D(pool_size=3, strides=2)(inception_3b) inception_3c_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_3c_pool) inception_3c = concatenate( [inception_3c_3x3, inception_3c_5x5, inception_3c_pool], axis=3) #inception 4a inception_4a_3x3 = utils.conv2d_bn(inception_3c, layer='inception_4a_3x3', cv1_out=96, cv1_filter=(1, 1), cv2_out=192, cv2_filter=(3, 3), cv2_strides=(1, 1), padding=(1, 1)) inception_4a_5x5 = utils.conv2d_bn(inception_3c, layer='inception_4a_5x5', cv1_out=32, cv1_filter=(1, 1), cv2_out=64, cv2_filter=(5, 5), cv2_strides=(1, 1), padding=(2, 2)) inception_4a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3c) inception_4a_pool = utils.conv2d_bn(inception_4a_pool, layer='inception_4a_pool', cv1_out=128, cv1_filter=(1, 1), padding=(2, 2)) inception_4a_1x1 = utils.conv2d_bn(inception_3c, layer='inception_4a_1x1', cv1_out=256, cv1_filter=(1, 1)) inception_4a = concatenate([ inception_4a_3x3, inception_4a_5x5, inception_4a_pool, inception_4a_1x1 ], axis=3) #inception4e inception_4e_3x3 = utils.conv2d_bn(inception_4a, layer='inception_4e_3x3', cv1_out=160, cv1_filter=(1, 1), cv2_out=256, cv2_filter=(3, 3), cv2_strides=(2, 2), padding=(1, 1)) inception_4e_5x5 = utils.conv2d_bn(inception_4a, layer='inception_4e_5x5', cv1_out=64, cv1_filter=(1, 1), cv2_out=128, cv2_filter=(5, 5), cv2_strides=(2, 2), padding=(2, 2)) inception_4e_pool = MaxPooling2D(pool_size=3, strides=2)(inception_4a) inception_4e_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_4e_pool) inception_4e = concatenate( [inception_4e_3x3, inception_4e_5x5, inception_4e_pool], axis=3) #inception5a inception_5a_3x3 = utils.conv2d_bn(inception_4e, layer='inception_5a_3x3', cv1_out=96, cv1_filter=(1, 1), cv2_out=384, cv2_filter=(3, 3), cv2_strides=(1, 1), padding=(1, 1)) inception_5a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_4e) inception_5a_pool = utils.conv2d_bn(inception_5a_pool, layer='inception_5a_pool', cv1_out=96, cv1_filter=(1, 1), padding=(1, 1)) inception_5a_1x1 = utils.conv2d_bn(inception_4e, layer='inception_5a_1x1', cv1_out=256, cv1_filter=(1, 1)) inception_5a = concatenate( [inception_5a_3x3, inception_5a_pool, inception_5a_1x1], axis=3) #inception_5b inception_5b_3x3 = utils.conv2d_bn(inception_5a, layer='inception_5b_3x3', cv1_out=96, cv1_filter=(1, 1), cv2_out=384, cv2_filter=(3, 3), cv2_strides=(1, 1), padding=(1, 1)) inception_5b_pool = MaxPooling2D(pool_size=3, strides=2)(inception_5a) inception_5b_pool = utils.conv2d_bn(inception_5b_pool, layer='inception_5b_pool', cv1_out=96, cv1_filter=(1, 1)) inception_5b_pool = ZeroPadding2D(padding=(1, 1))(inception_5b_pool) inception_5b_1x1 = utils.conv2d_bn(inception_5a, layer='inception_5b_1x1', cv1_out=256, cv1_filter=(1, 1)) inception_5b = concatenate( [inception_5b_3x3, inception_5b_pool, inception_5b_1x1], axis=3) av_pool = AveragePooling2D(pool_size=(3, 3), strides=(1, 1))(inception_5b) reshape_layer = Flatten()(av_pool) dense_layer = Dense(128, name='dense_layer')(reshape_layer) norm_layer = Lambda(lambda x: K.l2_normalize(x, axis=1), name='norm_layer')(dense_layer) return Model(inputs=[myInput], outputs=norm_layer)
def LeNet_plus_plus(perform_L2_norm=False, activation_type='softmax', ring_approach=False, background_class=False, knownsMinimumMag=None, center_approach=False, aux_input=None): """ Defines the network architecture for LeNet++. Use the options for different approaches: background_class: Classification with additional class for negative classes ring_approach: ObjectoSphere Loss applied if True knownsMinimumMag: Minimum Magnitude allowed for samples belonging to one of the Known Classes if ring_approach is True """ mnist_image = Input(shape=(28, 28, 1), dtype='float32', name='mnist_image') # 28 X 28 --> 14 X 14 conv1_1 = Conv2D(32, (5, 5), strides=1, padding="same", name='conv1_1')(mnist_image) conv1_2 = Conv2D(32, (5, 5), strides=1, padding="same", name='conv1_2')(conv1_1) conv1_2 = BatchNormalization(name='BatchNormalization_1')(conv1_2) pool1 = MaxPooling2D(pool_size=(2, 2), strides=2, name='pool1')(conv1_2) # 14 X 14 --> 7 X 7 conv2_1 = Conv2D(64, (5, 5), strides=1, padding="same", name='conv2_1')(pool1) conv2_2 = Conv2D(64, (5, 5), strides=1, padding="same", name='conv2_2')(conv2_1) conv2_2 = BatchNormalization(name='BatchNormalization_2')(conv2_2) pool2 = MaxPooling2D(pool_size=(2, 2), strides=2, name='pool2')(conv2_2) # 7 X 7 --> 3 X 3 conv3_1 = Conv2D(128, (5, 5), strides=1, padding="same", name='conv3_1')(pool2) conv3_2 = Conv2D(128, (5, 5), strides=1, padding="same", name='conv3_2')(conv3_1) conv3_2 = BatchNormalization(name='BatchNormalization_3')(conv3_2) pool3 = MaxPooling2D(pool_size=(2, 2), strides=2, name='pool3')(conv3_2) flatten = Flatten(name='flatten')(pool3) fc = Dense(2, name='fc', use_bias=True)(flatten) if perform_L2_norm: alpha_multipliers = Input((1, ), dtype='float32', name='alphas') act = Activation(lambda x: alpha_multipliers * (K.l2_normalize(x, axis=1)), name='act')(fc) pred = Dense(10, activation=activation_type, name='pred', use_bias=False)(act) model = Model(inputs=[mnist_image, alpha_multipliers], outputs=[pred]) elif center_approach: # incorporate center-loss and objectosphere-loss pred = Dense(10, name='pred', use_bias=False)(fc) softmax = Activation(activation_type, name='softmax')(pred) x = prelu(fc, name='side_out') centerlosslayer = CenterLossLayer( alpha=0.5, name='centerlosslayer')([x, aux_input]) # dummy use of knownsMinimumMag to stop keras from complaining mag = knownsMinimumMag model = Model(inputs=[mnist_image, knownsMinimumMag, aux_input], outputs=[softmax, fc, centerlosslayer]) #model.summary() elif knownsMinimumMag is not None: knownUnknownsFlag = Input((1, ), dtype='float32', name='knownUnknownsFlag') pred = Dense(10, name='pred', use_bias=False)(fc) softmax = Activation(activation_type, name='softmax')(pred) model = Model(inputs=[mnist_image, knownsMinimumMag], outputs=[softmax, fc]) model.summary() elif background_class: pred = Dense(11, name='pred', use_bias=False)(fc) softmax = Activation(activation_type, name='softmax')(pred) model = Model(inputs=[mnist_image], outputs=[softmax]) else: pred = Dense(10, name='pred', use_bias=False)(fc) softmax = Activation(activation_type, name='softmax')(pred) model = Model(inputs=[mnist_image], outputs=[softmax]) return model
def inception_v2(input_shape, embedding_size=512, dropout=0.3): """ Implementation of the Inception model used for FaceNet Arguments: input_shape -- shape of the images of the dataset Returns: model -- a Model() instance in Keras """ # Define the input as a tensor with shape input_shape X_input = Input(input_shape, name='feed_input') # Zero-Padding X = ZeroPadding2D((3, 3))(X_input) # First Block X = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(X) X = BatchNormalization(axis=1, name='bn1')(X) X = Activation('relu')(X) # Zero-Padding + MAXPOOL X = ZeroPadding2D((1, 1))(X) X = MaxPooling2D((3, 3), strides=2)(X) # Second Block X = Conv2D(128, (1, 1), strides=(1, 1), name='conv2')(X) X = BatchNormalization(axis=1, epsilon=0.00001, name='bn2')(X) X = Activation('relu')(X) # Zero-Padding + MAXPOOL X = ZeroPadding2D((1, 1))(X) # Second Block X = Conv2D(192, (3, 3), strides=(1, 1), name='conv3')(X) X = BatchNormalization(axis=1, epsilon=0.00001, name='bn3')(X) X = Activation('relu')(X) # Zero-Padding + MAXPOOL X = ZeroPadding2D((1, 1))(X) X = MaxPooling2D(pool_size=3, strides=2)(X) # Inception 1: a/b/c X = inception_block_1a(X) X = inception_block_1b(X) X = inception_block_1c(X) # Inception 2: a/b X = inception_block_2a(X) X = inception_block_2b(X) # Inception 3: a/b X = inception_block_3a(X) X = inception_block_3b(X) # Top layer X = AveragePooling2D(pool_size=(3, 3), strides=(1, 1), data_format='channels_first')(X) X = Flatten()(X) # Dropout # X = Dropout(dropout)(X) X = Dense(embedding_size, name='fc1')(X) # L2 normalization X = Lambda(lambda x: K.l2_normalize(x, axis=1), name="feed_output")(X) # Create model instance model = Model(inputs=X_input, outputs=X, name='inception_v2') return model
c3 = do_conv_act(p1, 64, k_size=5, st_size=1, k_reg=l2(gl_wd), padtype='same') c4 = do_conv_act(c3, 64, k_size=5, st_size=1, k_reg=l2(gl_wd), padtype='same') p2 = do_pool(c4, 2) c5 = do_conv_act(p2, 128, k_size=5, st_size=1, k_reg=l2(gl_wd), padtype='same') c6 = do_conv_act(c5, 128, k_size=5, st_size=1, k_reg=l2(gl_wd), padtype='same') p3 = do_pool(c6, 2) flat = Flatten()(p3) fc1 = Dense(3, kernel_regularizer=l2(gl_wd), use_bias=False)(flat) act_1 = PReLU()(fc1) bn1 = BatchNormalization()(act_1) l2_fc1 = Lambda(lambda x: K.l2_normalize(x, axis=1))(bn1) scale_l2 = Lambda(lambda x: x * 1)(l2_fc1) fc_cl = Dense(nb_classes, activation='softmax')(scale_l2) model = Model(inputs=img_input, outputs=fc_cl) # Set the optimizer sgd = SGD(lr=0.01, decay=0, momentum=0.9, nesterov=False) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.summary()
def l2_normalize(x): return K.l2_normalize(x, 0)
def timestepaverage(x): x = K.mean(x, axis=1) x = K.l2_normalize(x, axis=1) return x
def cosine_distance(vecs): x, y = vecs x = K.l2_normalize(x, axis=-1) y = K.l2_normalize(y, axis=-1) return -K.mean(x * y, axis=-1, keepdims=True)
def __call__( self, x, mask = None ): dotProd=K.dot(x[:len(x)/2],x[len(x)/2:]) l2norm = K.l2_normalize(dotProd) return l2norm
def faceRecoModel(input_shape): """ Implementation of the Inception model used for FaceNet Arguments: input_shape -- shape of the images of the dataset Returns: model -- a Model() instance in Keras """ # Define the input as a tensor with shape input_shape X_input = Input(input_shape) # Zero-Padding X = ZeroPadding2D((3, 3))(X_input) # First Block X = Conv2D(64, (7, 7), strides = (2, 2), name = 'conv1')(X) X = BatchNormalization(axis = 1, name = 'bn1')(X) X = Activation('relu')(X) # Zero-Padding + MAXPOOL X = ZeroPadding2D((1, 1))(X) X = MaxPooling2D((3, 3), strides = 2)(X) # Second Block X = Conv2D(64, (1, 1), strides = (1, 1), name = 'conv2')(X) X = BatchNormalization(axis = 1, epsilon=0.00001, name = 'bn2')(X) X = Activation('relu')(X) # Zero-Padding + MAXPOOL X = ZeroPadding2D((1, 1))(X) # Second Block X = Conv2D(192, (3, 3), strides = (1, 1), name = 'conv3')(X) X = BatchNormalization(axis = 1, epsilon=0.00001, name = 'bn3')(X) X = Activation('relu')(X) # Zero-Padding + MAXPOOL X = ZeroPadding2D((1, 1))(X) X = MaxPooling2D(pool_size = 3, strides = 2)(X) # Inception 1: a/b/c X = inception_block_1a(X) X = inception_block_1b(X) X = inception_block_1c(X) # Inception 2: a/b X = inception_block_2a(X) X = inception_block_2b(X) # Inception 3: a/b X = inception_block_3a(X) X = inception_block_3b(X) # Top layer X = AveragePooling2D(pool_size=(3, 3), strides=(1, 1), data_format='channels_first')(X) X = Flatten()(X) X = Dense(128, name='dense_layer')(X) # L2 normalization X = Lambda(lambda x: K.l2_normalize(x,axis=1))(X) # Create model instance model = Model(inputs = X_input, outputs = X, name='FaceRecoModel') return model
def faceRecoModel(input_shape): myInput = Input(input_shape) x = ZeroPadding2D(padding=(3, 3), input_shape=(96, 96, 3))(myInput) x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) x = BatchNormalization(axis=3, epsilon=0.00001, name='bn1')(x) x = Activation('relu')(x) x = ZeroPadding2D(padding=(1, 1))(x) x = MaxPooling2D(pool_size=3, strides=2)(x) x = Lambda(LRN2D, name='lrn_1')(x) x = Conv2D(64, (1, 1), name='conv2')(x) x = BatchNormalization(axis=3, epsilon=0.00001, name='bn2')(x) x = Activation('relu')(x) x = ZeroPadding2D(padding=(1, 1))(x) x = Conv2D(192, (3, 3), name='conv3')(x) x = BatchNormalization(axis=3, epsilon=0.00001, name='bn3')(x) x = Activation('relu')(x) x = Lambda(LRN2D, name='lrn_2')(x) x = ZeroPadding2D(padding=(1, 1))(x) x = MaxPooling2D(pool_size=3, strides=2)(x) # Inception3a inception_3a_3x3 = Conv2D(96, (1, 1), name='inception_3a_3x3_conv1')(x) inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn1')(inception_3a_3x3) inception_3a_3x3 = Activation('relu')(inception_3a_3x3) inception_3a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3a_3x3) inception_3a_3x3 = Conv2D(128, (3, 3), name='inception_3a_3x3_conv2')(inception_3a_3x3) inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn2')(inception_3a_3x3) inception_3a_3x3 = Activation('relu')(inception_3a_3x3) inception_3a_5x5 = Conv2D(16, (1, 1), name='inception_3a_5x5_conv1')(x) inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn1')(inception_3a_5x5) inception_3a_5x5 = Activation('relu')(inception_3a_5x5) inception_3a_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3a_5x5) inception_3a_5x5 = Conv2D(32, (5, 5), name='inception_3a_5x5_conv2')(inception_3a_5x5) inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn2')(inception_3a_5x5) inception_3a_5x5 = Activation('relu')(inception_3a_5x5) inception_3a_pool = MaxPooling2D(pool_size=3, strides=2)(x) inception_3a_pool = Conv2D(32, (1, 1), name='inception_3a_pool_conv')(inception_3a_pool) inception_3a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_pool_bn')(inception_3a_pool) inception_3a_pool = Activation('relu')(inception_3a_pool) inception_3a_pool = ZeroPadding2D(padding=((3, 4), (3, 4)))(inception_3a_pool) inception_3a_1x1 = Conv2D(64, (1, 1), name='inception_3a_1x1_conv')(x) inception_3a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_1x1_bn')(inception_3a_1x1) inception_3a_1x1 = Activation('relu')(inception_3a_1x1) inception_3a = concatenate([inception_3a_3x3, inception_3a_5x5, inception_3a_pool, inception_3a_1x1], axis=3) # Inception3b inception_3b_3x3 = Conv2D(96, (1, 1), name='inception_3b_3x3_conv1')(inception_3a) inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn1')(inception_3b_3x3) inception_3b_3x3 = Activation('relu')(inception_3b_3x3) inception_3b_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3b_3x3) inception_3b_3x3 = Conv2D(128, (3, 3), name='inception_3b_3x3_conv2')(inception_3b_3x3) inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn2')(inception_3b_3x3) inception_3b_3x3 = Activation('relu')(inception_3b_3x3) inception_3b_5x5 = Conv2D(32, (1, 1), name='inception_3b_5x5_conv1')(inception_3a) inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn1')(inception_3b_5x5) inception_3b_5x5 = Activation('relu')(inception_3b_5x5) inception_3b_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3b_5x5) inception_3b_5x5 = Conv2D(64, (5, 5), name='inception_3b_5x5_conv2')(inception_3b_5x5) inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn2')(inception_3b_5x5) inception_3b_5x5 = Activation('relu')(inception_3b_5x5) inception_3b_pool = Lambda(lambda x: x**2, name='power2_3b')(inception_3a) inception_3b_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3b_pool) inception_3b_pool = Lambda(lambda x: x*9, name='mult9_3b')(inception_3b_pool) inception_3b_pool = Lambda(lambda x: K.sqrt(x), name='sqrt_3b')(inception_3b_pool) inception_3b_pool = Conv2D(64, (1, 1), name='inception_3b_pool_conv')(inception_3b_pool) inception_3b_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_pool_bn')(inception_3b_pool) inception_3b_pool = Activation('relu')(inception_3b_pool) inception_3b_pool = ZeroPadding2D(padding=(4, 4))(inception_3b_pool) inception_3b_1x1 = Conv2D(64, (1, 1), name='inception_3b_1x1_conv')(inception_3a) inception_3b_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_1x1_bn')(inception_3b_1x1) inception_3b_1x1 = Activation('relu')(inception_3b_1x1) inception_3b = concatenate([inception_3b_3x3, inception_3b_5x5, inception_3b_pool, inception_3b_1x1], axis=3) # Inception3c inception_3c_3x3 = conv2d_bn(inception_3b, layer='inception_3c_3x3', cv1_out=128, cv1_filter=(1, 1), cv2_out=256, cv2_filter=(3, 3), cv2_strides=(2, 2), padding=(1, 1)) inception_3c_5x5 = conv2d_bn(inception_3b, layer='inception_3c_5x5', cv1_out=32, cv1_filter=(1, 1), cv2_out=64, cv2_filter=(5, 5), cv2_strides=(2, 2), padding=(2, 2)) inception_3c_pool = MaxPooling2D(pool_size=3, strides=2)(inception_3b) inception_3c_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_3c_pool) inception_3c = concatenate([inception_3c_3x3, inception_3c_5x5, inception_3c_pool], axis=3) #inception 4a inception_4a_3x3 = conv2d_bn(inception_3c, layer='inception_4a_3x3', cv1_out=96, cv1_filter=(1, 1), cv2_out=192, cv2_filter=(3, 3), cv2_strides=(1, 1), padding=(1, 1)) inception_4a_5x5 = conv2d_bn(inception_3c, layer='inception_4a_5x5', cv1_out=32, cv1_filter=(1, 1), cv2_out=64, cv2_filter=(5, 5), cv2_strides=(1, 1), padding=(2, 2)) inception_4a_pool = Lambda(lambda x: x**2, name='power2_4a')(inception_3c) inception_4a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_4a_pool) inception_4a_pool = Lambda(lambda x: x*9, name='mult9_4a')(inception_4a_pool) inception_4a_pool = Lambda(lambda x: K.sqrt(x), name='sqrt_4a')(inception_4a_pool) inception_4a_pool = conv2d_bn(inception_4a_pool, layer='inception_4a_pool', cv1_out=128, cv1_filter=(1, 1), padding=(2, 2)) inception_4a_1x1 = conv2d_bn(inception_3c, layer='inception_4a_1x1', cv1_out=256, cv1_filter=(1, 1)) inception_4a = concatenate([inception_4a_3x3, inception_4a_5x5, inception_4a_pool, inception_4a_1x1], axis=3) #inception4e inception_4e_3x3 = conv2d_bn(inception_4a, layer='inception_4e_3x3', cv1_out=160, cv1_filter=(1, 1), cv2_out=256, cv2_filter=(3, 3), cv2_strides=(2, 2), padding=(1, 1)) inception_4e_5x5 = conv2d_bn(inception_4a, layer='inception_4e_5x5', cv1_out=64, cv1_filter=(1, 1), cv2_out=128, cv2_filter=(5, 5), cv2_strides=(2, 2), padding=(2, 2)) inception_4e_pool = MaxPooling2D(pool_size=3, strides=2)(inception_4a) inception_4e_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_4e_pool) inception_4e = concatenate([inception_4e_3x3, inception_4e_5x5, inception_4e_pool], axis=3) #inception5a inception_5a_3x3 = conv2d_bn(inception_4e, layer='inception_5a_3x3', cv1_out=96, cv1_filter=(1, 1), cv2_out=384, cv2_filter=(3, 3), cv2_strides=(1, 1), padding=(1, 1)) inception_5a_pool = Lambda(lambda x: x**2, name='power2_5a')(inception_4e) inception_5a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_5a_pool) inception_5a_pool = Lambda(lambda x: x*9, name='mult9_5a')(inception_5a_pool) inception_5a_pool = Lambda(lambda x: K.sqrt(x), name='sqrt_5a')(inception_5a_pool) inception_5a_pool = conv2d_bn(inception_5a_pool, layer='inception_5a_pool', cv1_out=96, cv1_filter=(1, 1), padding=(1, 1)) inception_5a_1x1 = conv2d_bn(inception_4e, layer='inception_5a_1x1', cv1_out=256, cv1_filter=(1, 1)) inception_5a = concatenate([inception_5a_3x3, inception_5a_pool, inception_5a_1x1], axis=3) #inception_5b inception_5b_3x3 = conv2d_bn(inception_5a, layer='inception_5b_3x3', cv1_out=96, cv1_filter=(1, 1), cv2_out=384, cv2_filter=(3, 3), cv2_strides=(1, 1), padding=(1, 1)) inception_5b_pool = MaxPooling2D(pool_size=3, strides=2)(inception_5a) inception_5b_pool = conv2d_bn(inception_5b_pool, layer='inception_5b_pool', cv1_out=96, cv1_filter=(1, 1)) inception_5b_pool = ZeroPadding2D(padding=(1, 1))(inception_5b_pool) inception_5b_1x1 = conv2d_bn(inception_5a, layer='inception_5b_1x1', cv1_out=256, cv1_filter=(1, 1)) inception_5b = concatenate([inception_5b_3x3, inception_5b_pool, inception_5b_1x1], axis=3) av_pool = AveragePooling2D(pool_size=(3, 3), strides=(1, 1))(inception_5b) reshape_layer = Flatten()(av_pool) dense_layer = Dense(128, name='dense_layer')(reshape_layer) norm_layer = Lambda(lambda x: K.l2_normalize(x, axis=1), name='norm_layer')(dense_layer) # Final Model model = Model(inputs=myInput, outputs=norm_layer) model = load_model_weights(model) # Set trainable for layer in model.layers: layer.trainable=False for layer_name in trainable_layers: model.get_layer(layer_name).trainable = True return model
def algo2(s1_ga_pools, s1_gb_pools, s2_ga_pools, s2_gb_pools, use_cos, use_euc, use_abs): """:param s1_ga_pools: List of 'group A' outputs of sentece 1 for different pooling types [max, min, avg] where each entry has shape (?, len(filters_ga), fnum_ga) :param s2_ga_pools: List of 'group A' outputs of sentece 1 for different pooling types [max, min, avg] where each entry has shape (?, len(filters_ga), fnum_ga) :param s1_gb_pools: List of 'group B' outputs of sentence 1 for different pooling types [max, min] where each entry has shape (?, len(filters_gb), embeddings_dim, fnum_gb) :param s2_gb_pools: List of 'group B' outputs of sentence 2 for different pooling types [max, min] where each entry has shape (?, len(filters_gb), embeddings_dim, fnum_gb) """ # First part of the algorithm using Group A outputs assert use_cos or use_euc or use_abs, "You should use either cos, euc or abs" res1 = [] i = 0 for s1_ga, s2_ga in zip(s1_ga_pools, s2_ga_pools): sims = [] s1_ga_shape = s1_ga.get_shape().as_list() s2_ga_shape = s2_ga.get_shape().as_list() if use_cos: # Cosine similarity # Shape: cos_sim = (?, len(filters_ga), len(filters_ga)) cos_sim = Dot(axes=2, normalize=True, name="a2_ga_{}pool_cos".format(i))([s1_ga, s2_ga]) sims.append(Flatten()(cos_sim)) if use_euc: # Euclidean distance # Shape: euc_dis = (?, len(filters_ga), len(filters_ga)) s1_ga_bis = Reshape((s1_ga_shape[1], 1, s1_ga_shape[2]))(s1_ga) s2_ga_bis = Reshape((1, s2_ga_shape[1], s2_ga_shape[2]))(s2_ga) euc_dis = Lambda(lambda x: K.sqrt( K.clip(K.sum(K.square(x[0] - x[1]), axis=-1, keepdims=False), K.epsilon(), 1e+10)), name="a2_ga_{}pool_euc".format(i))( [s1_ga_bis, s2_ga_bis]) sims.append(Flatten()(euc_dis)) if use_abs: # Shape: abs_dis = (?, len(filters_ga), len(filters_ga)) s1_ga_bis = Reshape((s1_ga_shape[1], 1, s1_ga_shape[2]))(s1_ga) s2_ga_bis = Reshape((1, s2_ga_shape[1], s2_ga_shape[2]))(s2_ga) #abs_dis = Lambda(lambda x: K.sum(K.abs(K.clip(x[0] - x[1], 1e-7, 1e+10)), axis=-1, keepdims=False), # abs_dis = Lambda(lambda x: K.sum(K.abs(x[0] - x[1]), axis=-1, keepdims=False), # name="a2_ga_{}pool_abs".format(i))([s1_ga_bis, s2_ga_bis]) abs_dis = Lambda( lambda x: K.abs(x[0] - x[1]), name="a2_ga_{}pool_abs".format(i))([s1_ga_bis, s2_ga_bis]) sims.append(Flatten()(abs_dis)) if len(sims) == 1: res1.append(sims[0]) else: res1.append(Concatenate()(sims)) i += 1 # Shape: feaa = (?, 3 * 3 * len(filters_ga) * len(filters_ga)) if res1: if len(res1) == 1: feaa = res1[0] else: feaa = Concatenate(name="feaa")(res1) else: print("feaa is None") feaa = None # Second part of the algorithm using Group B outputs res2 = [] i = 0 for s1_gb, s2_gb in zip(s1_gb_pools, s2_gb_pools): sims = [] if use_cos: # Vector norms of len(filters_gb)-dimensional vectors # s1_norm.shape = s2_norm.shape = (?, len(filters_gb), embedding_dim, fnum_gb) s1_norm = Lambda(lambda x: K.l2_normalize(x, axis=2), name="{}pool_s1_norm".format(i))(s1_gb) s2_norm = Lambda(lambda x: K.l2_normalize(x, axis=2), name="{}pool_s2_norm".format(i))(s2_gb) # Cosine Similarity between vectors of shape (embedding_dim,) # cos_sim.shape = (?, len(filters_gb) * fnum_gb) cos_sim = Flatten()(Lambda( lambda x: K.sum(x[0] * x[1], axis=2), name="a2_gb_{}pool_cos".format(i))([s1_norm, s2_norm])) sims.append(cos_sim) if use_euc: # Euclidean Distance between vectors of shape (embedding_dim,) # euc_dis.shape = (?, len(filters_gb) * fnum_gb) #euc_dis = Flatten()(Lambda(lambda x: K.sqrt(K.sum(K.square(K.clip(x[0] - x[1], 1e-7, 1e+10)), euc_dis = Flatten()(Lambda( lambda x: K.sqrt( K.clip(K.sum(K.square(x[0] - x[1]), axis=2), K.epsilon(), 1e+10)), name="a2_gb_{}pool_euc".format(i))([s1_gb, s2_gb])) sims.append(euc_dis) if use_abs: # abs_dis.shape = (?, len(filters_gb) * embeddings_dim * fnum_gb) #abs_dis = Flatten()(Lambda(lambda x: K.sum(K.abs(K.clip(x[0] - x[1], 1e-7, 1e+10)), # abs_dis = Flatten()(Lambda(lambda x: K.sum(K.abs(x[0] - x[1]), # axis=2), name="a2_gb_{}pool_abs".format(i))([s1_gb, s2_gb])) abs_dis = Flatten()(Lambda( lambda x: K.abs(x[0] - x[1]), name="a2_gb_{}pool_abs".format(i))([s1_gb, s2_gb])) sims.append(abs_dis) if len(sims) == 1: res2.append(sims[0]) else: res2.append(Concatenate(axis=1)(sims)) i += 1 # feab = (?, 2 * (2 + embeddings_dim) * len(filters_gb) * fnum_gb) if res2: if len(res2) == 1: feab = res2[0] else: feab = Concatenate(name="feab")(res2) else: print("feab is None!") feab = None return feaa, feab
def normalize(x): return K.l2_normalize(x, axis=1)
def cosine_similarity(vests): x, y = vests x = K.l2_normalize(x, axis=-1) y = K.l2_normalize(y, axis=-1) return K.sum((x * y), axis=-1, keepdims=True)
def vgg_16_cbcnn(input_shape, no_classes, bilinear_output_dim, sum_pool=True, weight_decay_constant=5e-4, multi_label=False, weights_path=None): weights_regularizer = regularizers.l2(weight_decay_constant) # Input layer img_input = Input(shape=input_shape, name='spectr_input') # Block 1 x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', kernel_regularizer=weights_regularizer)(img_input) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', kernel_regularizer=weights_regularizer)(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) # Block 2 x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', kernel_regularizer=weights_regularizer)(x) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', kernel_regularizer=weights_regularizer)(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) # Block 3 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', kernel_regularizer=weights_regularizer)(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', kernel_regularizer=weights_regularizer)(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', kernel_regularizer=weights_regularizer)(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) # Block 4 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', kernel_regularizer=weights_regularizer)(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', kernel_regularizer=weights_regularizer)(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', kernel_regularizer=weights_regularizer)(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) # Block 5 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', kernel_regularizer=weights_regularizer)(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', kernel_regularizer=weights_regularizer)(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', kernel_regularizer=weights_regularizer)(x) # Merge using compact bilinear method # dummy_tensor_for_output_dim = K.placeholder(shape=(bilinear_output_dim,)) compact_bilinear_arg_list = [x, x] output_shape_x = x.get_shape().as_list()[1:] output_shape_cb = ( output_shape_x[0], output_shape_x[1], bilinear_output_dim, ) x = merge(compact_bilinear_arg_list, mode=compact_bilinear, name='compact_bilinear', output_shape=output_shape_cb) # If sum_pool=True do a global sum pooling if sum_pool: # Since using tf. Hence 3rd would represent channels x = Lambda(lambda x: K.sum(x, axis=[1, 2]))(x) # Sign sqrt and L2 normalize result x = Lambda(lambda x: K.sign(x) * K.sqrt(K.abs(x)))(x) x = Lambda(lambda x: K.l2_normalize(x, axis=-1))(x) # final dense layer if not multi_label: final_activation = 'softmax' else: final_activation = 'sigmoid' x = Dense(no_classes, activation=final_activation, name='softmax_layer', kernel_regularizer=weights_regularizer)(x) # Put together input and output to form model model = Model(inputs=[img_input], outputs=[x]) if weights_path: model.load_weights(weights_path, by_name=True) return model
def contrastiveLoss(Xl,Xr,y): return y*K.l2_normalize(Xl,Xr) + (1-y)*K.max(10,10-K.l2_normalize(Xl,Xr))
def call(self, x, mask=None): return K.l2_normalize(x, axis=self.axis)
def create_model(): myInput = Input(shape=(96, 96, 3)) x = ZeroPadding2D(padding=(3, 3), input_shape=(96, 96, 3))(myInput) x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) x = BatchNormalization(axis=3, epsilon=0.00001, name='bn1')(x) x = Activation('relu')(x) x = ZeroPadding2D(padding=(1, 1))(x) x = MaxPooling2D(pool_size=3, strides=2)(x) x = Lambda(LRN2D, name='lrn_1')(x) x = Conv2D(64, (1, 1), name='conv2')(x) x = BatchNormalization(axis=3, epsilon=0.00001, name='bn2')(x) x = Activation('relu')(x) x = ZeroPadding2D(padding=(1, 1))(x) x = Conv2D(192, (3, 3), name='conv3')(x) x = BatchNormalization(axis=3, epsilon=0.00001, name='bn3')(x) x = Activation('relu')(x) x = Lambda(LRN2D, name='lrn_2')(x) x = ZeroPadding2D(padding=(1, 1))(x) x = MaxPooling2D(pool_size=3, strides=2)(x) # Inception3a inception_3a_3x3 = Conv2D(96, (1, 1), name='inception_3a_3x3_conv1')(x) inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn1')(inception_3a_3x3) inception_3a_3x3 = Activation('relu')(inception_3a_3x3) inception_3a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3a_3x3) inception_3a_3x3 = Conv2D(128, (3, 3), name='inception_3a_3x3_conv2')(inception_3a_3x3) inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn2')(inception_3a_3x3) inception_3a_3x3 = Activation('relu')(inception_3a_3x3) inception_3a_5x5 = Conv2D(16, (1, 1), name='inception_3a_5x5_conv1')(x) inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn1')(inception_3a_5x5) inception_3a_5x5 = Activation('relu')(inception_3a_5x5) inception_3a_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3a_5x5) inception_3a_5x5 = Conv2D(32, (5, 5), name='inception_3a_5x5_conv2')(inception_3a_5x5) inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn2')(inception_3a_5x5) inception_3a_5x5 = Activation('relu')(inception_3a_5x5) inception_3a_pool = MaxPooling2D(pool_size=3, strides=2)(x) inception_3a_pool = Conv2D(32, (1, 1), name='inception_3a_pool_conv')(inception_3a_pool) inception_3a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_pool_bn')(inception_3a_pool) inception_3a_pool = Activation('relu')(inception_3a_pool) inception_3a_pool = ZeroPadding2D(padding=((3, 4), (3, 4)))(inception_3a_pool) inception_3a_1x1 = Conv2D(64, (1, 1), name='inception_3a_1x1_conv')(x) inception_3a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_1x1_bn')(inception_3a_1x1) inception_3a_1x1 = Activation('relu')(inception_3a_1x1) inception_3a = concatenate([inception_3a_3x3, inception_3a_5x5, inception_3a_pool, inception_3a_1x1], axis=3) # Inception3b inception_3b_3x3 = Conv2D(96, (1, 1), name='inception_3b_3x3_conv1')(inception_3a) inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn1')(inception_3b_3x3) inception_3b_3x3 = Activation('relu')(inception_3b_3x3) inception_3b_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3b_3x3) inception_3b_3x3 = Conv2D(128, (3, 3), name='inception_3b_3x3_conv2')(inception_3b_3x3) inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn2')(inception_3b_3x3) inception_3b_3x3 = Activation('relu')(inception_3b_3x3) inception_3b_5x5 = Conv2D(32, (1, 1), name='inception_3b_5x5_conv1')(inception_3a) inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn1')(inception_3b_5x5) inception_3b_5x5 = Activation('relu')(inception_3b_5x5) inception_3b_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3b_5x5) inception_3b_5x5 = Conv2D(64, (5, 5), name='inception_3b_5x5_conv2')(inception_3b_5x5) inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn2')(inception_3b_5x5) inception_3b_5x5 = Activation('relu')(inception_3b_5x5) inception_3b_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3a) inception_3b_pool = Conv2D(64, (1, 1), name='inception_3b_pool_conv')(inception_3b_pool) inception_3b_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_pool_bn')(inception_3b_pool) inception_3b_pool = Activation('relu')(inception_3b_pool) inception_3b_pool = ZeroPadding2D(padding=(4, 4))(inception_3b_pool) inception_3b_1x1 = Conv2D(64, (1, 1), name='inception_3b_1x1_conv')(inception_3a) inception_3b_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_1x1_bn')(inception_3b_1x1) inception_3b_1x1 = Activation('relu')(inception_3b_1x1) inception_3b = concatenate([inception_3b_3x3, inception_3b_5x5, inception_3b_pool, inception_3b_1x1], axis=3) # Inception3c inception_3c_3x3 = utils.conv2d_bn(inception_3b, layer='inception_3c_3x3', cv1_out=128, cv1_filter=(1, 1), cv2_out=256, cv2_filter=(3, 3), cv2_strides=(2, 2), padding=(1, 1)) inception_3c_5x5 = utils.conv2d_bn(inception_3b, layer='inception_3c_5x5', cv1_out=32, cv1_filter=(1, 1), cv2_out=64, cv2_filter=(5, 5), cv2_strides=(2, 2), padding=(2, 2)) inception_3c_pool = MaxPooling2D(pool_size=3, strides=2)(inception_3b) inception_3c_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_3c_pool) inception_3c = concatenate([inception_3c_3x3, inception_3c_5x5, inception_3c_pool], axis=3) #inception 4a inception_4a_3x3 = utils.conv2d_bn(inception_3c, layer='inception_4a_3x3', cv1_out=96, cv1_filter=(1, 1), cv2_out=192, cv2_filter=(3, 3), cv2_strides=(1, 1), padding=(1, 1)) inception_4a_5x5 = utils.conv2d_bn(inception_3c, layer='inception_4a_5x5', cv1_out=32, cv1_filter=(1, 1), cv2_out=64, cv2_filter=(5, 5), cv2_strides=(1, 1), padding=(2, 2)) inception_4a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3c) inception_4a_pool = utils.conv2d_bn(inception_4a_pool, layer='inception_4a_pool', cv1_out=128, cv1_filter=(1, 1), padding=(2, 2)) inception_4a_1x1 = utils.conv2d_bn(inception_3c, layer='inception_4a_1x1', cv1_out=256, cv1_filter=(1, 1)) inception_4a = concatenate([inception_4a_3x3, inception_4a_5x5, inception_4a_pool, inception_4a_1x1], axis=3) #inception4e inception_4e_3x3 = utils.conv2d_bn(inception_4a, layer='inception_4e_3x3', cv1_out=160, cv1_filter=(1, 1), cv2_out=256, cv2_filter=(3, 3), cv2_strides=(2, 2), padding=(1, 1)) inception_4e_5x5 = utils.conv2d_bn(inception_4a, layer='inception_4e_5x5', cv1_out=64, cv1_filter=(1, 1), cv2_out=128, cv2_filter=(5, 5), cv2_strides=(2, 2), padding=(2, 2)) inception_4e_pool = MaxPooling2D(pool_size=3, strides=2)(inception_4a) inception_4e_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_4e_pool) inception_4e = concatenate([inception_4e_3x3, inception_4e_5x5, inception_4e_pool], axis=3) #inception5a inception_5a_3x3 = utils.conv2d_bn(inception_4e, layer='inception_5a_3x3', cv1_out=96, cv1_filter=(1, 1), cv2_out=384, cv2_filter=(3, 3), cv2_strides=(1, 1), padding=(1, 1)) inception_5a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_4e) inception_5a_pool = utils.conv2d_bn(inception_5a_pool, layer='inception_5a_pool', cv1_out=96, cv1_filter=(1, 1), padding=(1, 1)) inception_5a_1x1 = utils.conv2d_bn(inception_4e, layer='inception_5a_1x1', cv1_out=256, cv1_filter=(1, 1)) inception_5a = concatenate([inception_5a_3x3, inception_5a_pool, inception_5a_1x1], axis=3) #inception_5b inception_5b_3x3 = utils.conv2d_bn(inception_5a, layer='inception_5b_3x3', cv1_out=96, cv1_filter=(1, 1), cv2_out=384, cv2_filter=(3, 3), cv2_strides=(1, 1), padding=(1, 1)) inception_5b_pool = MaxPooling2D(pool_size=3, strides=2)(inception_5a) inception_5b_pool = utils.conv2d_bn(inception_5b_pool, layer='inception_5b_pool', cv1_out=96, cv1_filter=(1, 1)) inception_5b_pool = ZeroPadding2D(padding=(1, 1))(inception_5b_pool) inception_5b_1x1 = utils.conv2d_bn(inception_5a, layer='inception_5b_1x1', cv1_out=256, cv1_filter=(1, 1)) inception_5b = concatenate([inception_5b_3x3, inception_5b_pool, inception_5b_1x1], axis=3) av_pool = AveragePooling2D(pool_size=(3, 3), strides=(1, 1))(inception_5b) reshape_layer = Flatten()(av_pool) dense_layer = Dense(128, name='dense_layer')(reshape_layer) norm_layer = Lambda(lambda x: K.l2_normalize(x, axis=1), name='norm_layer')(dense_layer) return Model(inputs=[myInput], outputs=norm_layer)
def hieroRecoModel_online(input_shape): """ Implementation of the Inception model used for FaceNet Arguments: input_shape -- shape of the images of the dataset Returns: model -- a Model() instance in Keras """ #Import VGG19 model for transfer learning without output layers vgg_model = applications.VGG19(weights = "imagenet", include_top=False, input_shape = input_shape) # Freeze the layers except the last 4 for layer in vgg_model.layers[:-4]: layer.trainable = False # Check the layers for layer in vgg_model.layers: print(layer, layer.trainable) X_input = vgg_model.output # Adding custom Layers X = Flatten()(X_input) X = Dense(512, activation="relu")(X) X = Dropout(0.5)(X) X = Dense(128, activation="relu")(X) # L2 normalization X = Lambda(lambda x: K.l2_normalize(x, axis=1))(X) # Create model instance #model = Model(inputs=vgg_model.input, outputs=X, name='HieroRecoModel') features = Model(vgg_model.input, X, name="features") # Inputs of the siamese network anchor = Input(shape=input_shape) positive = Input(shape=input_shape) negative = Input(shape=input_shape) # Embedding Features of input anchor_features = features(anchor) pos_features = features(positive) neg_features = features(negative) input_triplet = [anchor, positive, negative] output_features = [anchor_features, pos_features, neg_features] # Define the trainable model loss_model = Model(inputs=input_triplet, outputs=output_features, name='loss') loss_model.add_loss(K.mean(triplet_loss(output_features))) loss_model.compile(loss=None, optimizer='adam') # Create model instance # model = Model(inputs=X_input, outputs=X, name='HieroRecoModel_off') return features, loss_model
def call(self, inputs): inputs -= K.mean(inputs, axis=1, keepdims=True) inputs = K.l2_normalize(inputs, axis=1) pos = K.relu(inputs) neg = K.relu(-inputs) return K.concatenate([pos, neg], axis=1)
def compute_cos_match_score(l_r): l, r = l_r return K.batch_dot(K.l2_normalize(l, axis=-1), K.l2_normalize(r, axis=-1), axes=[2, 2])
def antirectifier(x): x -= K.mean(x, axis=1, keepdims=True) x = K.l2_normalize(x, axis=1) pos = K.relu(x) neg = K.relu(-x) return K.concatenate([pos, neg], axis=1)
def call(self, inputs): output = K.l2_normalize(inputs, axis=self.axis) return output
def vggvox_resnet2d_icassp( input_dim=(257, 250, 1), num_class=8631, mode='train', args=None): net = args.net loss = args.loss vlad_clusters = args.vlad_cluster ghost_clusters = args.ghost_cluster bottleneck_dim = args.bottleneck_dim aggregation = args.aggregation_mode mgpu = len(keras.backend.tensorflow_backend._get_available_gpus()) if net == 'resnet34s': inputs, x = backbone.resnet_2D_v1(input_dim=input_dim, mode=mode) else: inputs, x = backbone.resnet_2D_v2(input_dim=input_dim, mode=mode) # =============================================== # Fully Connected Block 1 # =============================================== x_fc = keras.layers.Conv2D( bottleneck_dim, (7, 1), strides=(1, 1), activation='relu', kernel_initializer='orthogonal', use_bias=True, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='x_fc')(x) # =============================================== # Feature Aggregation # =============================================== if aggregation == 'avg': if mode == 'train': x = keras.layers.AveragePooling2D((1, 5), strides=(1, 1), name='avg_pool')(x) x = keras.layers.Reshape((-1, bottleneck_dim))(x) else: x = keras.layers.GlobalAveragePooling2D(name='avg_pool')(x) x = keras.layers.Reshape((1, bottleneck_dim))(x) elif aggregation == 'vlad': x_k_center = keras.layers.Conv2D( vlad_clusters, (7, 1), strides=(1, 1), kernel_initializer='orthogonal', use_bias=True, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='vlad_center_assignment')(x) x = VladPooling(k_centers=vlad_clusters, mode='vlad', name='vlad_pool')([x_fc, x_k_center]) elif aggregation == 'gvlad': x_k_center = keras.layers.Conv2D( vlad_clusters + ghost_clusters, (7, 1), strides=(1, 1), kernel_initializer='orthogonal', use_bias=True, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='gvlad_center_assignment')(x) x = VladPooling(k_centers=vlad_clusters, g_centers=ghost_clusters, mode='gvlad', name='gvlad_pool')([x_fc, x_k_center]) else: raise IOError('==> unknown aggregation mode') # =============================================== # Fully Connected Block 2 # =============================================== x = keras.layers.Dense( bottleneck_dim, activation='relu', kernel_initializer='orthogonal', use_bias=True, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='fc6')(x) # =============================================== # Softmax Vs AMSoftmax # =============================================== if loss == 'softmax': y = keras.layers.Dense( num_class, activation='softmax', kernel_initializer='orthogonal', use_bias=False, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='prediction')(x) trnloss = 'categorical_crossentropy' elif loss == 'amsoftmax': x_l2 = keras.layers.Lambda(lambda x: K.l2_normalize(x, 1))(x) y = keras.layers.Dense( num_class, kernel_initializer='orthogonal', use_bias=False, trainable=True, kernel_constraint=keras.constraints.unit_norm(), kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='prediction')(x_l2) trnloss = amsoftmax_loss else: raise IOError('==> unknown loss.') if mode == 'eval': y = keras.layers.Lambda(lambda x: keras.backend.l2_normalize(x, 1))(x) model = keras.models.Model(inputs, y, name='vggvox_resnet2D_{}_{}'.format( loss, aggregation)) if mode == 'train': if mgpu > 1: model = ModelMGPU(model, gpus=mgpu) # set up optimizer. if args.optimizer == 'adam': opt = keras.optimizers.Adam(lr=1e-3) elif args.optimizer == 'sgd': opt = keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=True) else: raise IOError('==> unknown optimizer type') model.compile(optimizer=opt, loss=trnloss, metrics=['acc']) return model
def cosine_proximity(y_true, y_pred): assert K.ndim(y_true) == 2 assert K.ndim(y_pred) == 2 y_true = K.l2_normalize(y_true, axis=1) y_pred = K.l2_normalize(y_pred, axis=1) return K.sum(y_true * y_pred, axis=1)