def mildnet_without_skip_big(): vgg_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3)) convnet_output = Dense(2048, activation='relu')(vgg_model.output) convnet_output = Dropout(0.6)(convnet_output) convnet_output = Dense(2048, activation='relu')(convnet_output) convnet_output = Lambda(lambda x: K.l2_normalize(x, axis=1))( convnet_output) first_input = Input(shape=(224, 224, 3)) second_input = Input(shape=(224, 224, 3)) final_model = tf.keras.models.Model( inputs=[first_input, second_input, vgg_model.input], outputs=convnet_output) return final_model
def create_conv2(Nr,Nt,input_shape, output_dim): H_normalization_factor = np.sqrt(Nr * Nt) # lstm = tf.keras.layers.CuDNNLSTM model = Sequential() # Keras is requiring an extra dimension: I will add it with a reshape layer because I am using a generator model.add(Conv1D(64,3,activation="tanh", padding = "same", input_shape=input_shape )) model.add(Conv1D(64,3,activation="tanh", padding = "same")) model.add(Dropout(0.3)) model.add(Flatten()) #model.add(Dense(150, activation="tanh")) model.add(Dense(np.prod(output_dim), activation="linear")) model.add(Lambda(lambda x: H_normalization_factor * K.l2_normalize(x, axis=-1))) model.add(Reshape(output_dim)) return model
def call(self, inputs): features = inputs[0] fltr = inputs[1] if not K.is_sparse(fltr): fltr = ops.dense_to_sparse(fltr) features_neigh = self.aggregate_op( tf.gather(features, fltr.indices[:, -1]), fltr.indices[:, -2]) output = K.concatenate([features, features_neigh]) output = K.dot(output, self.kernel) if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) output = K.l2_normalize(output, axis=-1) return output
def __init__( self, batch_input_shape=(None, NUM_FRAMES, NUM_FBANKS, 1), include_softmax=False, num_speakers_softmax=None, ): self.include_softmax = include_softmax if self.include_softmax: assert num_speakers_softmax > 0 self.clipped_relu_count = 0 # http://cs231n.github.io/convolutional-networks/ # conv weights # #params = ks * ks * nb_filters * num_channels_input # Conv128-s # 5*5*128*128/2+128 # ks*ks*nb_filters*channels/strides+bias(=nb_filters) # take 100 ms -> 4 frames. # if signal is 3 seconds, then take 100ms per 100ms and average out this network. # 8*8 = 64 features. # used to share all the layers across the inputs # num_frames = K.shape() - do it dynamically after. inputs = Input(batch_shape=batch_input_shape, name="input") x = self.cnn_component(inputs) x = Reshape((-1, 2048))(x) # Temporal average layer. axis=1 is time. x = Lambda(lambda y: K.mean(y, axis=1), name="average")(x) if include_softmax: logger.info("Including a Dropout layer to reduce overfitting.") # used for softmax because the dataset we pre-train on might be too small. easy to overfit. x = Dropout(0.5)(x) x = Dense(512, name="affine")(x) if include_softmax: # Those weights are just when we train on softmax. x = Dense(num_speakers_softmax, activation="softmax")(x) else: # Does not contain any weights. x = Lambda(lambda y: K.l2_normalize(y, axis=1), name="ln")(x) self.m = Model(inputs, x, name="ResCNN")
def _create_model(self): """Build and compile model.""" # Encoding self.x = Input(shape=(self.original_dim,)) self.x_ = Lambda(lambda x: K.l2_normalize(x, axis=1))(self.x) self.dropout_encoder = Dropout(self.drop_encoder)(self.x_) self.h = Dense( self.intermediate_dim, activation="tanh", kernel_initializer=tf.keras.initializers.glorot_uniform(seed=self.seed), bias_initializer=tf.keras.initializers.truncated_normal( stddev=0.001, seed=self.seed ), )(self.dropout_encoder) self.z_mean = Dense(self.latent_dim)(self.h) self.z_log_var = Dense(self.latent_dim)(self.h) # Sampling self.z = Lambda(self._take_sample, output_shape=(self.latent_dim,))( [self.z_mean, self.z_log_var] ) # Decoding self.h_decoder = Dense( self.intermediate_dim, activation="tanh", kernel_initializer=tf.keras.initializers.glorot_uniform(seed=self.seed), bias_initializer=tf.keras.initializers.truncated_normal( stddev=0.001, seed=self.seed ), ) self.dropout_decoder = Dropout(self.drop_decoder) self.x_bar = Dense(self.original_dim) self.h_decoded = self.h_decoder(self.z) self.h_decoded_ = self.dropout_decoder(self.h_decoded) self.x_decoded = self.x_bar(self.h_decoded_) # Training self.model = Model(self.x, self.x_decoded) self.model.compile( optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss=self._get_vae_loss, )
def get_embeddingNetwork(input_shape=(96, 96, 3), embedding_size=4096): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # FATAL logging.getLogger('tensorflow').setLevel(logging.FATAL) base_model = keras.applications.VGG16(include_top=False, weights=None, input_shape=input_shape) inputs = Input(shape=input_shape) x = base_model(inputs) x = Flatten()(x) x = Dense(embedding_size, activation='sigmoid', kernel_regularizer=l2(2e-4))(x) outputs = Lambda(lambda x: K.l2_normalize(x, axis=-1))(x) embeddingNetwork = Model(inputs, outputs) return (embeddingNetwork)
def vggvox_model(): inp = Input(c.INPUT_SHAPE, name='input') x = conv_bn_pool(inp, layer_idx=1, conv_filters=96, conv_kernel_size=(7, 7), conv_strides=(2, 2), conv_pad=(1, 1), pool='max', pool_size=(3, 3), pool_strides=(2, 2)) x = conv_bn_pool(x, layer_idx=2, conv_filters=256, conv_kernel_size=(5, 5), conv_strides=(2, 2), conv_pad=(1, 1), pool='max', pool_size=(3, 3), pool_strides=(2, 2)) x = conv_bn_pool(x, layer_idx=3, conv_filters=384, conv_kernel_size=(3, 3), conv_strides=(1, 1), conv_pad=(1, 1)) x = conv_bn_pool(x, layer_idx=4, conv_filters=256, conv_kernel_size=(3, 3), conv_strides=(1, 1), conv_pad=(1, 1)) x = conv_bn_pool(x, layer_idx=5, conv_filters=256, conv_kernel_size=(3, 3), conv_strides=(1, 1), conv_pad=(1, 1), pool='max', pool_size=(5, 3), pool_strides=(3, 2)) x = conv_bn_dynamic_apool(x, layer_idx=6, conv_filters=4096, conv_kernel_size=(9, 1), conv_strides=(1, 1), conv_pad=(0, 0), conv_layer_prefix='fc') x = conv_bn_pool(x, layer_idx=7, conv_filters=1024, conv_kernel_size=(1, 1), conv_strides=(1, 1), conv_pad=(0, 0), conv_layer_prefix='fc') x = Lambda(lambda y: K.l2_normalize(y, axis=3), name='norm')(x) x = Conv2D(filters=1024, kernel_size=(1, 1), strides=(1, 1), padding='valid', name='fc8')(x) m = Model(inp, x, name='VGGVox') return m
def affinitykmeans(Y, V): def norm(tensor): square_tensor = K.square(tensor) frobenius_norm2 = K.sum(square_tensor, axis=(1, 2)) return frobenius_norm2 def dot(x, y): return K.batch_dot(x, y, axes=(2, 1)) def T(x): return K.permute_dimensions(x, [0, 2, 1]) V = K.l2_normalize(K.reshape(V, [BATCH_SIZE, -1, EMBEDDINGS_DIMENSION]), axis=-1) Y = K.reshape(Y, [BATCH_SIZE, -1, MAX_MIX]) silence_mask = K.sum(Y, axis=2, keepdims=True) V = silence_mask * V return norm(dot(T(V), V)) - norm(dot(T(V), Y)) * 2 + norm(dot(T(Y), Y))
def model_fn(inputs): """MLP with two output heads for mu and kappa""" x = Dense(2048, activation="relu")(inputs) x = Dense(2048, activation="relu")(x) x = Dense(2048, activation="relu")(x) x = Dense(2048, activation="relu")(x) mu = Dense(1024, activation="relu")(x) mu = Dense(1024, activation="relu")(mu) mu = Dense(3, activation="linear")(mu) mu = Lambda(lambda t: K.l2_normalize(t, axis=-1), name="mu")(mu) kappa = Dense(1024, activation="relu")(x) kappa = Dense(1024, activation="relu")(kappa) kappa = Dense(1, activation="relu")(kappa) kappa = Lambda(lambda t: K.squeeze(t, 1) + 0.001, name="kappa")(kappa) return tfp.layers.DistributionLambda( make_distribution_fn=lambda params: FisherVonMises( mean_direction=params[0], concentration=params[1]), convert_to_tensor_fn=tfd.Distribution.mean, name="fvm")([mu, kappa])
def build_network(input_shape, embedding_size): '''Api-request to face++ to get various attributes and head orientation. Args: input_shape (tuple of int): Input shape of images. embedding_size (int): Size of the final embedding layer. Returns: model (tensorflow.keras.engine.training.Model): Keras model. ''' inputs, outputs = ShuffleNetV2(include_top=False, input_shape=input_shape, bottleneck_ratio=0.35, num_shuffle_units=[2, 2, 2]) outputs = Dropout(0.0)(outputs) outputs = Dense(embedding_size, activation=None, kernel_initializer='he_uniform')(outputs) # force the encoding to live on the d-dimentional hypershpere outputs = Lambda(lambda x: K.l2_normalize(x, axis=-1))(outputs) return Model(inputs, outputs)
def call(self, inputs): input_shape = K.shape(inputs) if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') input_dim = input_shape[channel_axis] ker_shape = self.kernel_size + (input_dim, self.filters) nb_kernels = ker_shape[-2] * ker_shape[-1] kernel_shape_4_norm = (np.prod(self.kernel_size), nb_kernels) reshaped_kernel = K.reshape(self.kernel, kernel_shape_4_norm) normalized_weight = K.l2_normalize(reshaped_kernel, axis=0, epsilon=self.epsilon) normalized_weight = K.reshape(self.gamma, (1, ker_shape[-2] * ker_shape[-1])) * normalized_weight shaped_kernel = K.reshape(normalized_weight, ker_shape) shaped_kernel._keras_shape = ker_shape convArgs = {"strides": self.strides[0] if self.rank == 1 else self.strides, "padding": self.padding, "data_format": self.data_format, "dilation_rate": self.dilation_rate[0] if self.rank == 1 else self.dilation_rate} convFunc = {1: K.conv1d, 2: K.conv2d, 3: K.conv3d}[self.rank] output = convFunc(inputs, shaped_kernel, **convArgs) if self.use_bias: output = K.bias_add( output, self.bias, data_format=self.data_format ) if self.activation is not None: output = self.activation(output) return output
def __init__(self, layer_sizes, generator=None, bias=False, activation="sigmoid", normalize=None, **kwargs): if activation == "linear" or activation == "relu" or activation == "sigmoid": self.activation = activation else: raise ValueError( "Activation should be either 'linear', 'relu' or 'sigmoid'; received '{}'" .format(activation)) if normalize == "l2": self._normalization = Lambda(lambda x: K.l2_normalize(x, axis=-1)) elif normalize is None: self._normalization = Lambda(lambda x: x) else: raise ValueError( "Normalization should be either 'l2' or None; received '{}'". format(normalize)) # Get the model parameters from the generator or the keyword arguments if generator is not None: self._get_sizes_from_generator(generator) else: self._get_sizes_from_keywords(kwargs) # Model parameters self.n_layers = len(layer_sizes) self.bias = bias # Feature dimensions for each layer self.dims = [self.input_feature_size] + layer_sizes
def Mildnet_all_trainable(): vgg_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3)) intermediate_layer_outputs = get_layers_output_by_name( vgg_model, ["block1_pool", "block2_pool", "block3_pool", "block4_pool"]) convnet_output = GlobalAveragePooling2D()(vgg_model.output) for layer_name, output in intermediate_layer_outputs.items(): output = GlobalAveragePooling2D()(output) convnet_output = concatenate([convnet_output, output]) convnet_output = Dense(2048, activation='relu')(convnet_output) convnet_output = Dropout(0.6)(convnet_output) convnet_output = Dense(2048, activation='relu')(convnet_output) convnet_output = Lambda(lambda x: K.l2_normalize(x, axis=1))( convnet_output) final_model = tf.keras.models.Model(inputs=vgg_model.input, outputs=convnet_output) return final_model
def text_model(vocab_size, lr=0.0001): input_2 = Input(shape=(None, )) embed = Embedding(vocab_size, 50, name="embed") gru = Bidirectional(GRU(256, return_sequences=True), name="gru_1") dense_2 = Dense(vec_dim, activation="linear", name="dense_text_1") x2 = embed(input_2) x2 = gru(x2) x2 = GlobalMaxPool1D()(x2) x2 = dense_2(x2) _norm = Lambda(lambda x: K.l2_normalize(x, axis=-1)) x2 = _norm(x2) model = Model([input_2], x2) model.compile(loss="mae", optimizer=Adam(lr)) model.summary() return model
def call(self, inputs, **kwargs): n2n, message = inputs states = [message] for rep in range(self.repetitions): n2n_pool = tf.sparse.sparse_dense_matmul(n2n, states[rep]) # print(n2n_pool) node_representations = self.node_linear(n2n_pool) if self.combine_method == 'graphsage': combined = self.combine(node_representations) elif self.combine_method == 'structure2vec': combined = self.combine([node_representations, message]) elif self.combine_method == 'gru': combined = self.combine(tf.expand_dims(node_representations, 1)) else: raise ValueError(f'Combine method `{self.combine_method}` is not implemented yet!') res = K.l2_normalize(combined, axis=1) states.append(res) if not self.return_sequences: return states[-1] # B x embeding_dim x repetitions target_shape = [dim if dim is not None else -1 for dim in K.int_shape(message)] target_shape.append(self.repetitions) out = tf.concat(states[1:], axis=-1) out = tf.reshape(out, shape=target_shape) return out
def image_model(lr=0.0001): input_1 = Input(shape=(None, None, 3)) base_model = ResNet50(weights='imagenet', include_top=False) x1 = base_model(input_1) x1 = GlobalMaxPool2D()(x1) dense_1 = Dense(vec_dim, activation="linear", name="dense_image_1") x1 = dense_1(x1) _norm = Lambda(lambda x: K.l2_normalize(x, axis=-1)) x1 = _norm(x1) model = Model([input_1], x1) model.compile(loss="mae", optimizer=Adam(lr)) model.summary() return model
def call(self, inputs): """Following the routing algorithm from Hinton's paper, but replace b = b + <u,v> with b = <u,v>. This change can improve the feature representation of Capsule. However, you can replace b = K.batch_dot(outputs, hat_inputs, [2, 3]) with b += K.batch_dot(outputs, hat_inputs, [2, 3]) to realize a standard routing. """ if self.share_weights: hat_inputs = K.conv1d(inputs, self.kernel) else: hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1]) batch_size = K.shape(inputs)[0] input_num_capsule = K.shape(inputs)[1] hat_inputs = K.reshape(hat_inputs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3)) b = K.zeros_like(hat_inputs[:, :, :, 0]) for i in range(self.routings): c = softmax(b, 1) # o = self.activation(K.batch_dot(c, hat_inputs, [2, 2])) # [2, 2] o = tf.einsum('bin,binj->bij', c, hat_inputs) # print(2, o.shape) if i < self.routings - 1: o = K.l2_normalize(o, -1) # b = K.batch_dot(o, hat_inputs, [2, 3]) b = tf.einsum('bij,binj->bin', o, hat_inputs) return o
def call(self, x): # feat : bz x W x H x D, cluster_score: bz X W x H x clusters. feat, cluster_score = x num_features = feat.shape[-1] # softmax normalization to get soft-assignment. # stable variant: Deep Learning, MIT Press, pg 180-181 # softmax(z) = softmax(z - max(z)) # A : bz x W x H x clusters max_cluster_score = K.max(cluster_score, -1, keepdims=True) exp_cluster_score = K.exp(cluster_score - max_cluster_score) A = exp_cluster_score / K.sum( exp_cluster_score, axis=-1, keepdims=True) # A = Softmax(axis=-1, name="softmax_vlad")(cluster_score - max_cluster_score) # A = tf.nn.softmax(cluster_score - max_cluster_score, axis=-1) # Now, need to compute the residual, self.cluster: clusters x D A = K.expand_dims(A, -1) # A : bz x W x H x clusters x 1 feat_broadcast = K.expand_dims( feat, -2) # feat_broadcast : bz x W x H x 1 x D feat_res = feat_broadcast - self.cluster # feat_res : bz x W x H x clusters x D weighted_res = tf.multiply( A, feat_res) # weighted_res : bz x W x H x clusters x D cluster_res = K.sum( weighted_res, [1, 2]) # <-- agregation over time --> (bz x clusters x D) if self.mode == 'gvlad': cluster_res = cluster_res[:, :self.k_centers, :] cluster_l2 = K.l2_normalize(cluster_res, -1) # normalize D dimension outputs = K.reshape(cluster_l2, [-1, int(self.k_centers) * int(num_features)]) return outputs
def call(self, x): # feat : bz x W x H x D, cluster_score: bz X W x H x clusters. feat, cluster_score = x num_features = feat.shape[-1] # softmax normalization to get soft-assignment. # A : bz x W x H x clusters max_cluster_score = K.max(cluster_score, -1, keepdims=True) exp_cluster_score = K.exp(cluster_score - max_cluster_score) A = exp_cluster_score / K.sum(exp_cluster_score, axis=-1, keepdims = True) # Now, need to compute the residual, self.cluster: clusters x D A = K.expand_dims(A, -1) # A : bz x W x H x clusters x 1 feat_broadcast = K.expand_dims(feat, -2) # feat_broadcast : bz x W x H x 1 x D feat_res = feat_broadcast - self.cluster # feat_res : bz x W x H x clusters x D weighted_res = tf.multiply(A, feat_res) # weighted_res : bz x W x H x clusters x D cluster_res = K.sum(weighted_res, [1, 2]) if self.mode == 'gvlad': cluster_res = cluster_res[:, :self.k_centers, :] cluster_l2 = K.l2_normalize(cluster_res, -1) outputs = K.reshape(cluster_l2, [-1, int(self.k_centers) * int(num_features)]) return outputs
def call(self, inputs): features = inputs[0] fltr = inputs[1] # Enforce sparse representation if not K.is_sparse(fltr): fltr = ops.dense_to_sparse(fltr) # Propagation indices = fltr.indices N = tf.shape(features, out_type=indices.dtype)[0] indices = ops.sparse_add_self_loops(indices, N) targets, sources = indices[:, -2], indices[:, -1] messages = tf.gather(features, sources) aggregated = self.aggregate_op(messages, targets, N) output = K.concatenate([features, aggregated]) output = ops.dot(output, self.kernel) if self.use_bias: output = K.bias_add(output, self.bias) output = K.l2_normalize(output, axis=-1) if self.activation is not None: output = self.activation(output) return output
def progress(): audio1 = request.form["audio_inp1"] audio2 = request.form["audio_inp2"] inp = Input(batch_shape=(None, 160, 64, 1), name='input') x = cnn_component(inp) x = Reshape((-1, 2048))(x) x = Lambda(lambda y: K.mean(y, axis=1), name='average')(x) x = Dense(512, name='affine')(x) x = Lambda(lambda y: K.l2_normalize(y, axis=1), name='ln')(x) model = Model(inp, x, name='ResCNN') print('Loadind pretrained model') model.load_weights('ResCNN_triplet_training_checkpoint_265.h5', by_name=True) np.random.seed(123) random.seed(123) mfcc_001 = sample_from_mfcc(read_mfcc(audio1, SAMPLE_RATE), NUM_FRAMES) mfcc_002 = sample_from_mfcc(read_mfcc(audio2, SAMPLE_RATE), NUM_FRAMES) predict_001 = model.predict(np.expand_dims(mfcc_001, axis=0)) predict_002 = model.predict(np.expand_dims(mfcc_002, axis=0)) mul = np.multiply(predict_001, predict_002) s = np.sum(mul, axis=1) result, value = [], ' ' if s > 0.65: result.append('Profile Matched') value = s * 100 value = np.round(value, 1) else: result.append('Profile Not Matched') value = s * 100 value = np.round(value, 1) value = result[-1] + ' ' + str(value) return render_template("result.html", value=value)
def create_lstm(): global Nt global Nr H_normalization_factor = np.sqrt(Nr * Nt) lstm = keras.layers.CuDNNLSTM model = Sequential() model.add(lstm(128, return_sequences=True, input_shape=input_shape)) model.add(Dropout(0.2)) model.add(lstm(64, return_sequences=True)) model.add(Dropout(0.2)) model.add(lstm(256, return_sequences=False)) # model.add(Flatten()) model.add(Dropout(0.2)) model.add(Dense(256, activation="relu")) model.add(Dropout(0.2)) model.add(Dense(256, activation="relu")) model.add(Dropout(0.2)) model.add(Dense(256, activation="relu")) model.add(Dense(numOutputs, activation="linear")) model.add(Lambda(lambda x: H_normalization_factor * K.l2_normalize(x, axis=-1))) model.add(Reshape(output_dim)) return model
def vggvox_resnet2d_icassp( input_dim=(257, 250, 1), num_class=8631, mode='train', args=None): net = args.net loss = args.loss vlad_clusters = args.vlad_cluster ghost_clusters = args.ghost_cluster bottleneck_dim = args.bottleneck_dim aggregation = args.aggregation_mode mgpu = len(tf.config.experimental.list_physical_devices('GPU')) if net == 'resnet34s': inputs, x = backbone.resnet_2D_v1(input_dim=input_dim, mode=mode) else: inputs, x = backbone.resnet_2D_v2(input_dim=input_dim, mode=mode) # =============================================== # Fully Connected Block 1 # =============================================== x_fc = keras.layers.Conv2D( bottleneck_dim, (7, 1), strides=(1, 1), activation='relu', kernel_initializer='orthogonal', use_bias=True, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='x_fc')(x) # =============================================== # Feature Aggregation # =============================================== if aggregation == 'avg': if mode == 'train': x = keras.layers.AveragePooling2D((1, 5), strides=(1, 1), name='avg_pool')(x) x = keras.layers.Reshape((-1, bottleneck_dim))(x) else: x = keras.layers.GlobalAveragePooling2D(name='avg_pool')(x) x = keras.layers.Reshape((1, bottleneck_dim))(x) elif aggregation == 'vlad': x_k_center = keras.layers.Conv2D( vlad_clusters, (7, 1), strides=(1, 1), kernel_initializer='orthogonal', use_bias=True, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='vlad_center_assignment')(x) x = VladPooling(k_centers=vlad_clusters, mode='vlad', name='vlad_pool')([x_fc, x_k_center]) elif aggregation == 'gvlad': x_k_center = keras.layers.Conv2D( vlad_clusters + ghost_clusters, (7, 1), strides=(1, 1), kernel_initializer='orthogonal', use_bias=True, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='gvlad_center_assignment')(x) x = VladPooling(k_centers=vlad_clusters, g_centers=ghost_clusters, mode='gvlad', name='gvlad_pool')([x_fc, x_k_center]) else: raise IOError('==> unknown aggregation mode') # =============================================== # Fully Connected Block 2 # =============================================== x = keras.layers.Dense( bottleneck_dim, activation='relu', kernel_initializer='orthogonal', use_bias=True, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='fc6')(x) # =============================================== # Softmax Vs AMSoftmax # =============================================== if loss == 'softmax': y = keras.layers.Dense( num_class, activation='softmax', kernel_initializer='orthogonal', use_bias=False, trainable=True, kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='prediction')(x) trnloss = 'categorical_crossentropy' elif loss == 'amsoftmax': x_l2 = keras.layers.Lambda(lambda x: K.l2_normalize(x, 1))(x) y = keras.layers.Dense( num_class, kernel_initializer='orthogonal', use_bias=False, trainable=True, kernel_constraint=keras.constraints.unit_norm(), kernel_regularizer=keras.regularizers.l2(weight_decay), bias_regularizer=keras.regularizers.l2(weight_decay), name='prediction')(x_l2) trnloss = amsoftmax_loss else: raise IOError('==> unknown loss.') if mode == 'eval': y = keras.layers.Lambda(lambda x: keras.backend.l2_normalize(x, 1))(x) model = keras.models.Model(inputs, y, name='vggvox_resnet2D_{}_{}'.format( loss, aggregation)) if mode == 'train': if mgpu > 1: model = ModelMGPU(model, gpus=mgpu) # set up optimizer. if args.optimizer == 'adam': opt = keras.optimizers.Adam(lr=1e-3) elif args.optimizer == 'sgd': opt = keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=0.0, nesterov=True) else: raise IOError('==> unknown optimizer type') model.compile(optimizer=opt, loss=trnloss, metrics=['acc']) return model
def call(self, input_tensor, training=None): z_app = np.product(self.app_dim) z_pos = np.product(self.pos_dim) # For each child (input) capsule (t_0) project into all parent (output) capsule domain (t_1) idx_all = 0 u_hat_t_list = [] # Split input_tensor by capsule types u_t_list = [tf.squeeze(u_t, axis=1) for u_t in tf.split(input_tensor, self.input_num_capsule, axis=1)] for u_t in u_t_list: u_t = tf.reshape(u_t, [self.batch * self.channels, self.input_height, self.input_width, 1]) u_t.set_shape((None, self.input_height, self.input_width, 1)) # Apply spatial kernel # Incorporating local neighborhood information by learning a convolution kernel of size k x k for the pose # and appearance matrices Pi and Ai if self.op == "conv": u_spat_t = K.conv2d(u_t, self.W[idx_all], (self.strides, self.strides), padding=self.padding, data_format='channels_last') elif self.op == "deconv": out_height = deconv_length(self.input_height, self.strides, self.kernel_size, self.padding, output_padding=None) out_width = deconv_length(self.input_width, self.strides, self.kernel_size, self.padding, output_padding=None) output_shape = (self.batch * self.channels, out_height, out_width, self.num_capsule) u_spat_t = K.conv2d_transpose(u_t, self.W[idx_all], output_shape, (self.strides, self.strides), padding=self.padding, data_format='channels_last') else: raise ValueError("Wrong type of operation for capsule") # Some shape operation H_1 = u_spat_t.get_shape()[1] W_1 = u_spat_t.get_shape()[2] # H_1 = tf.shape(u_spat_t)[1] # W_1 = tf.shape(u_spat_t)[2] u_spat_t = tf.reshape(u_spat_t, [self.batch, self.channels, H_1, W_1, self.num_capsule]) u_spat_t = tf.transpose(u_spat_t, (0, 2, 3, 4, 1)) u_spat_t = tf.reshape(u_spat_t, [self.batch, H_1, W_1, self.num_capsule * self.channels]) # Split convolution output of input_tensor to Pose and Appearance matrices u_t_pos, u_t_app = tf.split(u_spat_t, [self.num_capsule * z_pos, self.num_capsule * z_app], axis=-1) u_t_pos = tf.reshape(u_t_pos, [self.batch, H_1, W_1, self.num_capsule, self.pos_dim[0], self.pos_dim[1]]) u_t_app = tf.reshape(u_t_app, [self.batch, H_1, W_1, self.num_capsule, self.app_dim[0], self.app_dim[1]]) # Gather projection matrices and bias # Take appropriate capsule type mult_pos = tf.gather(self.W_pos, idx_all, axis=0) mult_pos = tf.reshape(mult_pos, [self.num_capsule, self.pos_dim[1], self.pos_dim[1]]) mult_app = tf.gather(self.W_app, idx_all, axis=0) mult_app = tf.reshape(mult_app, [self.num_capsule, self.app_dim[1], self.app_dim[1]]) bias = tf.reshape(tf.gather(self.b_app, idx_all, axis=0), (1, 1, 1, self.num_capsule, 1, 1)) u_t_app += bias # Prepare the pose projection matrix mult_pos = K.l2_normalize(mult_pos, axis=-2) if self.coord_add: mult_pos = coordinate_addition(mult_pos, [1, H_1, W_1, self.num_capsule, self.pos_dim[1], self.pos_dim[1]]) u_t_pos = mat_mult_2d(u_t_pos, mult_pos) u_t_app = mat_mult_2d(u_t_app, mult_app) # Store the result u_hat_t_pos = tf.reshape(u_t_pos, [self.batch, H_1, W_1, self.num_capsule, z_pos]) u_hat_t_app = tf.reshape(u_t_app, [self.batch, H_1, W_1, self.num_capsule, z_app]) u_hat_t = tf.concat([u_hat_t_pos, u_hat_t_app], axis=-1) u_hat_t_list.append(u_hat_t) idx_all += 1 u_hat_t_list = tf.stack(u_hat_t_list, axis=-2) u_hat_t_list = tf.transpose(u_hat_t_list, [0, 5, 1, 2, 4, 3]) # [N, H, W, t_1, t_0, z] = > [N, z, H_1, W_1, t_0, t_1] # Routing operation if self.routings > 0: if self.routing_type is 'dynamic': if type(self.routings) is list: self.routings = self.routings[-1] c_t_list = routing2d(routing=self.routings, t_0=self.input_num_capsule, u_hat_t_list=u_hat_t_list) # [T1][N,H,W,to] elif self.routing_type is 'dual': if type(self.routings) is list: self.routings = self.routings[-1] c_t_list = dual_routing(routing=self.routings, t_0=self.input_num_capsule, u_hat_t_list=u_hat_t_list, z_app=z_app, z_pos=z_pos) # [T1][N,H,W,to] else: raise ValueError(self.routing_type + ' is an invalid routing; try dynamic or dual') else: self.routing_type = 'NONE' c = tf.ones([self.batch, H_1, W_1, self.input_num_capsule, self.num_capsule]) c_t_list = [tf.squeeze(c_t, axis=-1) for c_t in tf.split(c, self.num_capsule, axis=-1)] # Form each parent capsule through the weighted sum of all child capsules r_t_mul_u_hat_t_list = [] u_hat_t_list_ = [(tf.squeeze(u_hat_t, axis=-1)) for u_hat_t in tf.split(u_hat_t_list, self.num_capsule, axis=-1)] for c_t, u_hat_t in zip(c_t_list, u_hat_t_list_): r_t = tf.expand_dims(c_t, axis=1) r_t_mul_u_hat_t_list.append(tf.reduce_sum(r_t * u_hat_t, axis=-1)) p = r_t_mul_u_hat_t_list p = tf.stack(p, axis=1) p_pos, p_app = tf.split(p, [z_pos, z_app], axis=2) # Squash the weighted sum to form the final parent capsule v_pos = Psquash(p_pos, axis=2) v_app = matwo_squash(p_app, axis=2) outputs = tf.concat([v_pos, v_app], axis=2) return outputs
def call(self, x, mask=None): return self.gamma * K.l2_normalize(x, self.axis)
def __init__( self, layer_sizes, generator=None, aggregator=None, bias=True, dropout=0.0, normalize="l2", activations=None, kernel_initializer="glorot_uniform", kernel_regularizer=None, kernel_constraint=None, bias_initializer="zeros", bias_regularizer=None, bias_constraint=None, n_samples=None, input_neighbor_tree=None, input_dim=None, multiplicity=None, ): # Set the aggregator layer used in the model if aggregator is None: self._aggregator = MeanHinAggregator elif issubclass(aggregator, Layer): self._aggregator = aggregator else: raise TypeError("Aggregator should be a subclass of Keras Layer") # Set the normalization layer used in the model if normalize == "l2": self._normalization = Lambda(lambda x: K.l2_normalize(x, axis=-1)) elif normalize is None or normalize == "none" or normalize == "None": self._normalization = Lambda(lambda x: x) else: raise ValueError( "Normalization should be either 'l2' or 'none'; received '{}'". format(normalize)) # Get the sampling tree, input_dim, and num_samples from the generator # if no generator these must be supplied in kwargs if generator is not None: self._get_sizes_from_generator(generator) else: self.subtree_schema = _require_without_generator( input_neighbor_tree, "input_neighbor_tree") self.n_samples = _require_without_generator(n_samples, "n_samples") self.input_dims = _require_without_generator( input_dim, "input_dim") self.multiplicity = _require_without_generator( multiplicity, "multiplicity") # Set parameters for the model self.n_layers = len(self.n_samples) self.bias = bias self.dropout = dropout # Neighbourhood info per layer self.neigh_trees = self._eval_neigh_tree_per_layer( [li for li in self.subtree_schema if len(li[1]) > 0]) # Depth of each input tensor i.e. number of hops from root nodes self._depths = [ self.n_layers + 1 - sum([ 1 for li in [self.subtree_schema] + self.neigh_trees if i < len(li) ]) for i in range(len(self.subtree_schema)) ] # Dict of {node type: dimension} per layer self.dims = [ dim if isinstance(dim, dict) else { k: dim for k, _ in ([self.subtree_schema] + self.neigh_trees)[layer] } for layer, dim in enumerate([self.input_dims] + layer_sizes) ] # Activation function for each layer if activations is None: activations = ["relu"] * (self.n_layers - 1) + ["linear"] elif len(activations) != self.n_layers: raise ValueError( "Invalid number of activations; require one function per layer" ) self.activations = activations # Aggregator functions for each layer self._aggs = [{ node_type: self._aggregator( output_dim, bias=self.bias, act=self.activations[layer], kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, kernel_constraint=kernel_constraint, bias_initializer=bias_initializer, bias_regularizer=bias_regularizer, bias_constraint=bias_constraint, ) for node_type, output_dim in self.dims[layer + 1].items() } for layer in range(self.n_layers)]
def call(self, x, mask=None): output = K.l2_normalize(x, self.axis) return output * self.gamma
def build_model(self): flags = self.flags self.l2_regul = tf.keras.regularizers.l2(l=flags.l2_regul) input_tensor = tf.keras.Input(shape=self.input_shape) x = Conv2D(filters=self.filters1, kernel_size=flags.first_kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv1')(input_tensor) if flags.use_batchnorm: x = BatchNormalization()(x) x = Conv2D(filters=self.filters2, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv2')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = MaxPool2D(pool_size=flags.pool_size, padding=flags.pool_padding)(x) x = Conv2D(filters=self.filters3, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv3')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = Conv2D(filters=self.filters4, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv4')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = MaxPool2D(pool_size=flags.pool_size, padding=flags.pool_padding)(x) x = Conv2D(filters=self.filters5, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv5')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = Conv2D(filters=self.filters6, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv6')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = MaxPool2D(pool_size=flags.pool_size, padding=flags.pool_padding)(x) x = Conv2D(filters=self.filters7, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv7')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = Conv2D(filters=self.filters8, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv8')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = MaxPool2D(pool_size=flags.pool_size, padding=flags.pool_padding)(x) x = Conv2D(filters=self.filters9, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv9')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = Conv2D(filters=self.filters10, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv10')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = Conv2D(filters=self.filters11, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv11')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = Conv2D(filters=self.filters12, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv12')(x) if flags.use_batchnorm: x = BatchNormalization()(x) last_pool = MaxPool2D(pool_size=flags.pool_size, padding=flags.pool_padding)(x) e_flatten = Flatten()(last_pool) embedding = Dense(units=flags.embedding_size, activation=None, name='e_fc1')(e_flatten) embedding = l2_normalize(embedding) self.embedding = embedding d_flatten = Dense(units=e_flatten.shape[1], activation=None, name='d_fc1')(embedding) reshaped = Reshape(target_shape=last_pool.shape[1:])(d_flatten) first_uppool = UpSampling2D(size=flags.pool_size)(reshaped) x = Conv2DTranspose(filters=self.filters12, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv12')(first_uppool) if flags.use_batchnorm: x = BatchNormalization()(x) x = Conv2DTranspose(filters=self.filters11, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv11')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = UpSampling2D(size=flags.pool_size)(x) x = Conv2DTranspose(filters=self.filters10, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv10')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = Conv2DTranspose(filters=self.filters9, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv9')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = Conv2DTranspose(filters=self.filters8, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv8')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = Conv2DTranspose(filters=self.filters7, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv7')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = UpSampling2D(size=flags.pool_size)(x) x = Conv2DTranspose(filters=self.filters6, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv6')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = Conv2DTranspose(filters=self.filters5, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv5')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = UpSampling2D(size=flags.pool_size)(x) x = Conv2DTranspose(filters=self.filters4, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv4')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = Conv2DTranspose(filters=self.filters3, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv3')(x) if flags.use_batchnorm: x = BatchNormalization()(x) x = UpSampling2D(size=flags.pool_size)(x) x = Conv2DTranspose(filters=self.filters2, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv2')(x) if flags.use_batchnorm: x = BatchNormalization()(x) output_tensor = Conv2DTranspose(filters=1, kernel_size=flags.first_kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='output_tensor')(x) self.model = tf.keras.Model(inputs=input_tensor, outputs=output_tensor) self.optimizer = tf.keras.optimizers.Adam(flags.learning_rate1) self.loss_object = tf.keras.losses.MeanSquaredError()
def build_model(self): flags = self.flags self.l2_regul = tf.keras.regularizers.l2(l=flags.l2_regul) input_tensor = tf.keras.Input(shape=self.input_shape) e_conv1 = Conv2D(filters=flags.filters1, kernel_size=(self.num_rows, 3), activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv1')(input_tensor) e_conv2 = Conv2D(filters=flags.filters2, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv2')(e_conv1) e_pool1 = MaxPool2D(pool_size=flags.pool_size, padding=flags.pool_padding)(e_conv2) e_conv3 = Conv2D(filters=flags.filters3, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv3')(e_pool1) e_conv4 = Conv2D(filters=flags.filters4, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv4')(e_conv3) e_pool2 = MaxPool2D(pool_size=flags.pool_size, padding=flags.pool_padding)(e_conv4) e_conv5 = Conv2D(filters=flags.filters5, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv5')(e_pool2) e_conv6 = Conv2D(filters=flags.filters6, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='e_conv6')(e_conv5) e_pool3 = MaxPool2D(pool_size=flags.pool_size, padding=flags.pool_padding)(e_conv6) e_flatten = Flatten()(e_pool3) e_fc1 = Dense(units=flags.units1, activation=flags.act_fn1, name='e_fc1')(e_flatten) embedding = Dense(units=flags.embedding_size, activation=None, name='e_fc2')(e_fc1) embedding = l2_normalize(embedding) self.embedding = embedding d_fc2 = Dense(units=flags.units1, activation=flags.act_fn1, name='d_fc2')(embedding) d_flatten = Dense(units=e_flatten.shape[1], activation=None, name='d_fc1')(d_fc2) reshaped = Reshape(target_shape=e_pool3.shape[1:])(d_flatten) d_uppool3 = UpSampling2D(size=flags.pool_size)(reshaped) d_conv6 = Conv2DTranspose(filters=flags.filters5, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv6')(d_uppool3) d_conv5 = Conv2DTranspose(filters=flags.filters4, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv5')(d_conv6) d_uppool2 = UpSampling2D(size=flags.pool_size)(d_conv5) d_conv4 = Conv2DTranspose(filters=flags.filters3, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv4')(d_uppool2) d_conv3 = Conv2DTranspose(filters=flags.filters2, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv3')(d_conv4) d_uppool1 = UpSampling2D(size=flags.pool_size)(d_conv3) d_conv2 = Conv2DTranspose(filters=flags.filters2, kernel_size=flags.kernel_size, activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='d_conv2')(d_uppool1) output_tensor = Conv2DTranspose(filters=1, kernel_size=(21, 3), activation=flags.act_fn1, dtype=tf.float32, kernel_regularizer=self.l2_regul, padding=flags.conv_padding, name='output_tensor')(d_conv2) self.model = tf.keras.Model(inputs=input_tensor, outputs=output_tensor) self.optimizer = tf.keras.optimizers.Adam(flags.learning_rate1) self.loss_object = tf.keras.losses.MeanSquaredError()
def cosine_distance(tensor_a, tensor_b): l2_norm_a = K.l2_normalize(tensor_a, axis=-1) l2_norm_b = K.l2_normalize(tensor_b, axis=-1) return 1 - K.sum(l2_norm_a * l2_norm_b, axis=-1, keepdims=True)