def _build_newsencoder(self, embedding_layer): """The main function to create news encoder of LSTUR. Args: embedding_layer (object): a word embedding layer. Return: object: the news encoder of LSTUR. """ hparams = self.hparams sequences_input_title = keras.Input(shape=(hparams.title_size,), dtype="int32") embedded_sequences_title = embedding_layer(sequences_input_title) y = layers.Dropout(hparams.dropout)(embedded_sequences_title) y = layers.Conv1D( hparams.filter_num, hparams.window_size, activation=hparams.cnn_activation, padding="same", bias_initializer=keras.initializers.Zeros(), kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed), )(y) print(y) y = layers.Dropout(hparams.dropout)(y) y = layers.Masking()( OverwriteMasking()([y, ComputeMasking()(sequences_input_title)]) ) pred_title = AttLayer2(hparams.attention_hidden_dim, seed=self.seed)(y) print(pred_title) model = keras.Model(sequences_input_title, pred_title, name="news_encoder") return model
def rnn_model(params, training_dr_lstm=True, training_dr_ll=True): """RNN model for text.""" input_shape = (params['fix_len']) seq_input = layers.Input(shape=input_shape) # vocab+1 because of padding seq_emb = layers.Embedding(params['vocab_size'] + 1, params['emb_size'], input_length=params['fix_len'])(seq_input) lstm_out = layers.LSTM(params['hidden_lstm_size'], dropout=params['dropout_rate_lstm'])( seq_emb, training=training_dr_lstm) out = layers.Dropout(rate=params['dropout_rate'], seed=params['random_seed'])(lstm_out, training=training_dr_ll) if params['variational']: # scale kl loss by number of training examples. # larger training dataset depends less on prior def scaled_kl_fn(p, q, _): return tfp.distributions.kl_divergence(q, p) / params['n_train'] logits = tfpl.DenseReparameterization( params['n_class_in'], activation=None, kernel_divergence_fn=scaled_kl_fn, bias_posterior_fn=tfpl.util.default_mean_field_normal_fn(), name='last_layer')(out) else: logits = layers.Dense( params['n_class_in'], activation=None, kernel_regularizer=regularizers.l2(params['reg_weight']), bias_regularizer=regularizers.l2(params['reg_weight']), name='last_layer')(out) probs = layers.Softmax(axis=1)(logits) return models.Model(seq_input, probs, name='rnn')
def build_cnn_model(n_features: int, n_classes: int): """Build the P1FP(C) model using Keras.""" model = keras.Sequential() model.add(layers.Reshape((1, n_features, 1), input_shape=(n_features, ), name="input")) model.add(layers.Conv2D( 128, 12, activation="relu", kernel_regularizer="l2", padding="same")) model.add(layers.MaxPool2D(10, padding="same")) model.add(layers.Lambda(nn.local_response_normalization)) model.add(layers.Conv2D( 128, 12, activation="relu", kernel_regularizer="l2", padding="same")) model.add(layers.MaxPool2D(10, padding="same")) model.add(layers.Lambda(nn.local_response_normalization)) # It is flattened for the computation regardless, however tflearn retained # the flattened result whereas keras does not model.add(layers.Flatten()) model.add(layers.Dense(256, activation="tanh")) model.add(layers.Dropout(rate=0.2)) model.add(layers.Dense(n_classes, activation="softmax", name="target")) learning_rate = keras.optimizers.schedules.ExponentialDecay( 0.05, decay_steps=1000, decay_rate=0.96) model.compile( optimizer=keras.optimizers.SGD(learning_rate=learning_rate), loss="categorical_crossentropy", metrics=[keras.metrics.TopKCategoricalAccuracy(3), "accuracy"]) return model
def PersonalizedAttentivePooling(dim1, dim2, dim3, seed=0): """Soft alignment attention implement. Attributes: dim1 (int): first dimention of value shape. dim2 (int): second dimention of value shape. dim3 (int): shape of query Returns: object: weighted summary of inputs value. """ vecs_input = keras.Input(shape=(dim1, dim2), dtype="float32") query_input = keras.Input(shape=(dim3, ), dtype="float32") user_vecs = layers.Dropout(0.2)(vecs_input) user_att = layers.Dense( dim3, activation="tanh", kernel_initializer=keras.initializers.glorot_uniform(seed=seed), bias_initializer=keras.initializers.Zeros(), )(user_vecs) user_att2 = layers.Dot(axes=-1)([query_input, user_att]) user_att2 = layers.Activation("softmax")(user_att2) user_vec = layers.Dot((1, 1))([user_vecs, user_att2]) model = keras.Model([vecs_input, query_input], user_vec) return model
def get_keras_layers_for_mnist_experiment(num_components): """Get Keras layers for the MNIST experiment. Args: num_components: (int) number of components to use for every layer. Returns: A list of lists of `keras.layer.Layer`s, where the outer index corresponds to layer id, and inner index to component id within a layer. """ keras_layers = [] filters = 4 keras_layers.append([ layers.Conv2D(filters=filters, kernel_size=5, activation="relu") for _ in range(num_components) ]) keras_layers.append([layers.AveragePooling2D(pool_size=2)]) keras_layers.append([ layers.Conv2D(filters=filters, kernel_size=3, activation="relu") for _ in range(num_components) ]) keras_layers.append([layers.AveragePooling2D(pool_size=2)]) keras_layers.append([ layers.Conv2D(filters=filters, kernel_size=3, activation="relu") for _ in range(num_components) ]) keras_layers.append([layers.Flatten()]) keras_layers.append([layers.Dropout(0.5)]) return keras_layers
def identity_block_base(input_tensor, kernel_size, filters, stage, block, num_updates, dropout_rate=0., use_variational_layers=False): """The identity block is the block that has no conv layer at shortcut. Arguments: input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names num_updates: integer, total steps in an epoch (for weighting the loss) dropout_rate: float, always-on dropout rate. use_variational_layers: boolean, if true train a variational model Returns: x: Output tensor for the block. """ filters1, filters2, filters3 = filters divergence_fn = lambda q, p, ignore: (tfd.kl_divergence(q, p) / num_updates ) if backend.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' if not use_variational_layers: first_conv_2d = layers.Conv2D( filters1, (1, 1), use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2a') if dropout_rate > 0.: x = layers.Dropout(dropout_rate)(input_tensor, training=True) x = first_conv_2d(x) else: x = first_conv_2d(input_tensor) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2a')(x) x = layers.Activation('relu')(x) if dropout_rate > 0.: x = layers.Dropout(dropout_rate)(x, training=True) x = layers.Conv2D(filters2, kernel_size, use_bias=False, padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2b')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2b')(x) x = layers.Activation('relu')(x) if dropout_rate > 0.: x = layers.Dropout(dropout_rate)(x, training=True) x = layers.Conv2D(filters3, (1, 1), use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2c')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2c')(x) else: x = tfpl.Convolution2DFlipout( filters1, kernel_size=(1, 1), padding='SAME', name=conv_name_base + '2a', kernel_divergence_fn=divergence_fn, )(input_tensor) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2a')(x) x = layers.Activation('relu')(x) x = tfpl.Convolution2DFlipout( filters2, kernel_size=kernel_size, padding='SAME', activation=None, name=conv_name_base + '2b', kernel_divergence_fn=divergence_fn, )(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2b')(x) x = layers.Activation('relu')(x) x = tfpl.Convolution2DFlipout( filters3, kernel_size=(1, 1), padding='SAME', activation=None, name=conv_name_base + '2c', kernel_divergence_fn=divergence_fn, )(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2c')(x) x = layers.add([x, input_tensor]) x = layers.Activation('relu')(x) return x
def ResNet50(method, num_classes, num_updates, dropout_rate): """Instantiates the ResNet50 architecture. Args: method: `str`, method for accounting for uncertainty. Must be one of ['vanilla', 'll_dropout', 'll_svi', 'dropout', 'svi', 'dropout_nofirst'] num_classes: `int` number of classes for image classification. num_updates: integer, total steps in an epoch (for weighting the loss) dropout_rate: Dropout rate for ll_dropout, dropout methods. Returns: A Keras model instance. pylint: disable=invalid-name """ # Determine proper input shape if backend.image_data_format() == 'channels_first': input_shape = (3, 224, 224) bn_axis = 1 else: input_shape = (224, 224, 3) bn_axis = 3 if (method in ['dropout', 'll_dropout', 'dropout_nofirst' ]) != (dropout_rate > 0.): raise ValueError( 'Dropout rate should be nonzero iff a dropout method is used.' 'Method is {}, dropout is {}.'.format(method, dropout_rate)) use_variational_layers = method == 'svi' hidden_layer_dropout = dropout_rate if method in [ 'dropout', 'dropout_nofirst' ] else 0. img_input = layers.Input(shape=input_shape) x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input) if (dropout_rate > 0.) and (method != 'dropout_nofirst'): x = layers.Dropout(hidden_layer_dropout)(x, training=True) x = layers.Conv2D(64, (7, 7), use_bias=False, strides=(2, 2), padding='valid', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='conv1')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name='bn_conv1')(x) x = layers.Activation('relu')(x) x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) conv_block = functools.partial( conv_block_base, num_updates=num_updates, dropout_rate=hidden_layer_dropout, use_variational_layers=use_variational_layers) identity_block = functools.partial( identity_block_base, num_updates=num_updates, dropout_rate=hidden_layer_dropout, use_variational_layers=use_variational_layers) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') x = layers.GlobalAveragePooling2D(name='avg_pool')(x) if dropout_rate > 0.: x = layers.Dropout(dropout_rate)(x, training=True) if method in ['ll_svi', 'svi']: x = tfpl.dense_variational_v2.DenseVariational( units=num_classes, make_posterior_fn=posterior_mean_field, make_prior_fn=functools.partial(prior_trainable, num_updates=num_updates), use_bias=True, kl_weight=1. / num_updates, kl_use_exact=True, name='fc1000')(x) else: x = layers.Dense(num_classes, kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='fc1000')(x) # Create model. return models.Model(img_input, x, name='resnet50')
def get_keras_layers_for_general_diversity_and_depth_model( layer_description, num_filters, num_layers, num_downsamples, group_norm_num_groups): """Gets Keras layers for the Omniglot and CIFAR-100 experiments. This model is a generalized version of the one proposed by the authors of "Diversity and Depth in Per-Example Routing Models" (https://openreview.net/pdf?id=BkxWJnC9tX). Args: layer_description: (list of string) description of a single layer, see `get_components_layer_for_general_diversity_and_depth_model`. num_filters: (int) number of filters for each convolution. num_layers: (int) number of layers. num_downsamples: (int) number of times the input should be downsampled by a factor of 2 before reaching the linear task-specific heads. group_norm_num_groups: (int) number of groups to use for group normalization. Returns: A list of lists of `keras.layer.Layer`s, where the outer index corresponds to layer id, and inner index to component id within a layer. """ keras_layers = [] # Initial shared 1x1 convolution, which increases the number of channels # from 1 to `num_filters`. keras_layers.append( [layers.Conv2D(filters=num_filters, kernel_size=1, padding="same")]) keras_layers.append([GroupNorm(num_groups=group_norm_num_groups)]) keras_layers.append([layers.ReLU()]) downsampling_interval = num_layers / num_downsamples # Subset of `range(0, num_layers)` - subset of layers for downsampling. downsampling_layers = [ int(downsampling_interval * i) for i in range(num_downsamples) ] for layer_id in range(num_layers): if layer_id in downsampling_layers: layer_strides = 2 else: layer_strides = 1 keras_layers.append( get_components_layer_for_general_diversity_and_depth_model( layer_description, num_filters, group_norm_num_groups, layer_strides)) keras_layers.append([GroupNorm(num_groups=group_norm_num_groups)]) keras_layers.append([layers.ReLU()]) # At this point, the feature map is `2^num_downsamples` times smaller. keras_layers.append([layers.Flatten()]) keras_layers.append([layers.Dropout(0.5)]) return keras_layers
def stack_layers(inputs, net_layers, kernel_initializer='glorot_uniform'): """Builds the architecture of the network by applying each layer specified in net_layers to inputs. Args: inputs: a dict containing input_types and input_placeholders for each key and value pair, respecively. net_layers: a list of dicts containing all layers to be used in the network, where each dict describes one such layer. each dict requires the key 'type'. all other keys are dependent on the layer type. kernel_initializer: initialization configuration passed to keras (see keras initializers). Returns: outputs: a dict formatted in much the same way as inputs. it contains input_types and output_tensors for each key and value pair, respectively, where output_tensors are the outputs of the input_placeholders in inputs after each layer in net_layers is applied. """ outputs = dict() for key in inputs: outputs[key] = inputs[key] for layer in net_layers: # check for l2_reg argument l2_reg = layer.get('l2_reg') if l2_reg: l2_reg = l2(layer['l2_reg']) # create the layer if layer['type'] in [ 'softplus', 'softsign', 'softmax', 'tanh', 'sigmoid', 'relu', 'selu' ]: l = layers.Dense( layer['size'], activation=layer['type'], kernel_initializer=kernel_initializer, kernel_regularizer=l2_reg, name=layer.get('name')) elif layer['type'] == 'None': l = layers.Dense( layer['size'], kernel_initializer=kernel_initializer, kernel_regularizer=l2_reg, name=layer.get('name')) elif layer['type'] == 'Conv2D': l = layers.Conv2D( layer['channels'], kernel_size=layer['kernel'], activation='relu', data_format='channels_last', kernel_regularizer=l2_reg, name=layer.get('name')) elif layer['type'] == 'BatchNormalization': l = layers.BatchNormalization(name=layer.get('name')) elif layer['type'] == 'MaxPooling2D': l = layers.MaxPooling2D( pool_size=layer['pool_size'], data_format='channels_first', name=layer.get('name')) elif layer['type'] == 'Dropout': l = layers.Dropout(layer['rate'], name=layer.get('name')) elif layer['type'] == 'Flatten': l = layers.Flatten(name=layer.get('name')) else: raise ValueError("Invalid layer type '{}'".format(layer['type'])) # apply the layer to each input in inputs for k in outputs: outputs[k] = l(outputs[k]) return outputs
def build_model( n_classes: int, n_packet_features: int, n_meta_features: int = 7, dilations: bool = True, tag: str = "varcnn", ): """Build the Var-CNN model. The resulting model takes a single input of shape (n_samples, n_packet_features + n_meta_features). The meta features must be the rightmost (last) features in the matrix. The model handles separating the two types of features and reshaping them as necessary. Parameters: ----------- n_classes : The number of classes to be predicted. n_packet_features : The number of packet features such as the number of interarrival times or the number of packet directions or sizes. n_meta_features: The number of meta features such as total packet counts, total transmission duration, etc. """ use_metadata = n_meta_features > 0 # Constructs dir or time ResNet input_layer = keras.Input( shape=(n_packet_features + n_meta_features, ), name="input") layer = (Crop(end=n_packet_features)(input_layer) if use_metadata else input_layer) layer = layers.Reshape((n_packet_features, 1))(layer) output_layer = ResNet18( layer, tag, block=(dilated_basic_1d if dilations else basic_1d)) concat_params = [output_layer] combined = concat_params[0] # Construct MLP for metadata if use_metadata: metadata_output = Crop(start=-n_meta_features)(input_layer) # consider this the embedding of all the metadata metadata_output = layers.Dense(32)(metadata_output) metadata_output = layers.BatchNormalization()( metadata_output) metadata_output = layers.Activation('relu')(metadata_output) concat_params.append(metadata_output) combined = layers.Concatenate()(concat_params) # Better to have final fc layer if combining multiple models if len(concat_params) > 1: combined = layers.Dense(1024)(combined) combined = layers.BatchNormalization()(combined) combined = layers.Activation('relu')(combined) combined = layers.Dropout(0.5)(combined) model_output = layers.Dense(units=n_classes, activation='softmax', name='model_output')(combined) model = keras.Model(inputs=input_layer, outputs=model_output) model.compile( loss='categorical_crossentropy', metrics=['accuracy'], optimizer=keras.optimizers.Adam(0.001)) return model
def build_model(n_features: int, n_classes: int): """Create and return the DeepFingerprinting Model.""" model = keras.Sequential() # Block1 filter_num = ['None', 32, 64, 128, 256] kernel_size = ['None', 8, 8, 8, 8] conv_stride_size = ['None', 1, 1, 1, 1] pool_stride_size = ['None', 4, 4, 4, 4] pool_size = ['None', 8, 8, 8, 8] model.add(layers.Reshape((n_features, 1), input_shape=(n_features, ))) model.add( layers.Conv1D(filters=filter_num[1], kernel_size=kernel_size[1], strides=conv_stride_size[1], padding='same', name='block1_conv1')) model.add(layers.BatchNormalization(axis=-1)) model.add(layers.ELU(alpha=1.0, name='block1_adv_act1')) model.add( layers.Conv1D(filters=filter_num[1], kernel_size=kernel_size[1], strides=conv_stride_size[1], padding='same', name='block1_conv2')) model.add(layers.BatchNormalization(axis=-1)) model.add(layers.ELU(alpha=1.0, name='block1_adv_act2')) model.add( layers.MaxPooling1D(pool_size=pool_size[1], strides=pool_stride_size[1], padding='same', name='block1_pool')) model.add(layers.Dropout(0.1, name='block1_dropout')) model.add( layers.Conv1D(filters=filter_num[2], kernel_size=kernel_size[2], strides=conv_stride_size[2], padding='same', name='block2_conv1')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='block2_act1')) model.add( layers.Conv1D(filters=filter_num[2], kernel_size=kernel_size[2], strides=conv_stride_size[2], padding='same', name='block2_conv2')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='block2_act2')) model.add( layers.MaxPooling1D(pool_size=pool_size[2], strides=pool_stride_size[3], padding='same', name='block2_pool')) model.add(layers.Dropout(0.1, name='block2_dropout')) model.add( layers.Conv1D(filters=filter_num[3], kernel_size=kernel_size[3], strides=conv_stride_size[3], padding='same', name='block3_conv1')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='block3_act1')) model.add( layers.Conv1D(filters=filter_num[3], kernel_size=kernel_size[3], strides=conv_stride_size[3], padding='same', name='block3_conv2')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='block3_act2')) model.add( layers.MaxPooling1D(pool_size=pool_size[3], strides=pool_stride_size[3], padding='same', name='block3_pool')) model.add(layers.Dropout(0.1, name='block3_dropout')) model.add( layers.Conv1D(filters=filter_num[4], kernel_size=kernel_size[4], strides=conv_stride_size[4], padding='same', name='block4_conv1')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='block4_act1')) model.add( layers.Conv1D(filters=filter_num[4], kernel_size=kernel_size[4], strides=conv_stride_size[4], padding='same', name='block4_conv2')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='block4_act2')) model.add( layers.MaxPooling1D(pool_size=pool_size[4], strides=pool_stride_size[4], padding='same', name='block4_pool')) model.add(layers.Dropout(0.1, name='block4_dropout')) model.add(layers.Flatten(name='flatten')) model.add( layers.Dense(512, kernel_initializer=initializers.glorot_uniform(seed=0), name='fc1')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='fc1_act')) model.add(layers.Dropout(0.7, name='fc1_dropout')) model.add( layers.Dense(512, kernel_initializer=initializers.glorot_uniform(seed=0), name='fc2')) model.add(layers.BatchNormalization()) model.add(layers.Activation('relu', name='fc2_act')) model.add(layers.Dropout(0.5, name='fc2_dropout')) model.add( layers.Dense(n_classes, kernel_initializer=initializers.glorot_uniform(seed=0), name='fc3')) model.add(layers.Activation('softmax', name="softmax")) model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0), metrics=["accuracy"]) return model