def _build_lstur(self): """The main function to create LSTUR's logic. The core of LSTUR is a user encoder and a news encoder. Returns: object: a model used to train. object: a model used to evaluate and inference. """ hparams = self.hparams his_input_title = keras.Input( shape=(hparams.his_size, hparams.title_size), dtype="int32" ) pred_input_title = keras.Input( shape=(hparams.npratio + 1, hparams.title_size), dtype="int32" ) pred_input_title_one = keras.Input( shape=( 1, hparams.title_size, ), dtype="int32", ) pred_title_reshape = layers.Reshape((hparams.title_size,))(pred_input_title_one) user_indexes = keras.Input(shape=(1,), dtype="int32") embedding_layer = layers.Embedding( self.word2vec_embedding.shape[0], hparams.word_emb_dim, weights=[self.word2vec_embedding], trainable=True, ) titleencoder = self._build_newsencoder(embedding_layer) self.userencoder = self._build_userencoder(titleencoder, type=hparams.type) self.newsencoder = titleencoder user_present = self.userencoder([his_input_title, user_indexes]) news_present = layers.TimeDistributed(self.newsencoder)(pred_input_title) news_present_one = self.newsencoder(pred_title_reshape) preds = layers.Dot(axes=-1)([news_present, user_present]) preds = layers.Activation(activation="softmax")(preds) pred_one = layers.Dot(axes=-1)([news_present_one, user_present]) pred_one = layers.Activation(activation="sigmoid")(pred_one) model = keras.Model([user_indexes, his_input_title, pred_input_title], preds) scorer = keras.Model( [user_indexes, his_input_title, pred_input_title_one], pred_one ) return model, scorer
def test_get_metadata_functional(self): inputs1 = keras.Input(shape=(10, ), name='model_input1') inputs2 = keras.Input(shape=(10, ), name='model_input2') x = keras.layers.Dense(32, activation='relu')(inputs1) x = keras.layers.Dense(32, activation='relu')(x) x = keras.layers.concatenate([x, inputs2]) outputs = keras.layers.Dense(1, activation='sigmoid')(x) fun_model = keras.Model(inputs=[inputs1, inputs2], outputs=outputs, name='fun') builder = keras_metadata_builder.KerasGraphMetadataBuilder(fun_model) generated_md = builder.get_metadata() expected_md = { 'inputs': { 'model_input1': { 'input_tensor_name': 'model_input1:0', 'modality': 'numeric', 'encoding': 'identity' }, 'model_input2': { 'input_tensor_name': 'model_input2:0', 'modality': 'numeric', 'encoding': 'identity' } }, 'outputs': { 'dense_2/Sigmoid': { 'output_tensor_name': 'dense_2/Sigmoid:0' } }, 'framework': 'Tensorflow', 'tags': ['explainable_ai_sdk'] } self.assertDictEqual(expected_md, generated_md)
def PersonalizedAttentivePooling(dim1, dim2, dim3, seed=0): """Soft alignment attention implement. Attributes: dim1 (int): first dimention of value shape. dim2 (int): second dimention of value shape. dim3 (int): shape of query Returns: object: weighted summary of inputs value. """ vecs_input = keras.Input(shape=(dim1, dim2), dtype="float32") query_input = keras.Input(shape=(dim3, ), dtype="float32") user_vecs = layers.Dropout(0.2)(vecs_input) user_att = layers.Dense( dim3, activation="tanh", kernel_initializer=keras.initializers.glorot_uniform(seed=seed), bias_initializer=keras.initializers.Zeros(), )(user_vecs) user_att2 = layers.Dot(axes=-1)([query_input, user_att]) user_att2 = layers.Activation("softmax")(user_att2) user_vec = layers.Dot((1, 1))([user_vecs, user_att2]) model = keras.Model([vecs_input, query_input], user_vec) return model
def _build_newsencoder(self, embedding_layer): """The main function to create news encoder of LSTUR. Args: embedding_layer (object): a word embedding layer. Return: object: the news encoder of LSTUR. """ hparams = self.hparams sequences_input_title = keras.Input(shape=(hparams.title_size,), dtype="int32") embedded_sequences_title = embedding_layer(sequences_input_title) y = layers.Dropout(hparams.dropout)(embedded_sequences_title) y = layers.Conv1D( hparams.filter_num, hparams.window_size, activation=hparams.cnn_activation, padding="same", bias_initializer=keras.initializers.Zeros(), kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed), )(y) print(y) y = layers.Dropout(hparams.dropout)(y) y = layers.Masking()( OverwriteMasking()([y, ComputeMasking()(sequences_input_title)]) ) pred_title = AttLayer2(hparams.attention_hidden_dim, seed=self.seed)(y) print(pred_title) model = keras.Model(sequences_input_title, pred_title, name="news_encoder") return model
def _build_userencoder(self, titleencoder, type="ini"): """The main function to create user encoder of LSTUR. Args: titleencoder (object): the news encoder of LSTUR. Return: object: the user encoder of LSTUR. """ hparams = self.hparams his_input_title = keras.Input( shape=(hparams.his_size, hparams.title_size), dtype="int32" ) user_indexes = keras.Input(shape=(1,), dtype="int32") user_embedding_layer = layers.Embedding( len(self.train_iterator.uid2index), hparams.gru_unit, trainable=True, embeddings_initializer="zeros", ) long_u_emb = layers.Reshape((hparams.gru_unit,))( user_embedding_layer(user_indexes) ) click_title_presents = layers.TimeDistributed(titleencoder)(his_input_title) if type == "ini": user_present = layers.GRU( hparams.gru_unit, kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed), recurrent_initializer=keras.initializers.glorot_uniform(seed=self.seed), bias_initializer=keras.initializers.Zeros(), )( layers.Masking(mask_value=0.0)(click_title_presents), initial_state=[long_u_emb], ) elif type == "con": short_uemb = layers.GRU( hparams.gru_unit, kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed), recurrent_initializer=keras.initializers.glorot_uniform(seed=self.seed), bias_initializer=keras.initializers.Zeros(), )(layers.Masking(mask_value=0.0)(click_title_presents)) user_present = layers.Concatenate()([short_uemb, long_u_emb]) user_present = layers.Dense( hparams.gru_unit, bias_initializer=keras.initializers.Zeros(), kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed), )(user_present) model = keras.Model( [his_input_title, user_indexes], user_present, name="user_encoder" ) return model
def get_keras_model_v1(): import tensorflow.compat.v1.keras as keras inputs = keras.Input(shape=(784,), name="img") x = keras.layers.Dense(64, activation="relu")(inputs) x = keras.layers.Dense(64, activation="relu")(x) outputs = keras.layers.Dense(10, activation="softmax")(x) model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model") return model
def test_get_metadata_multiple_outputs_incorrect_output(self): inputs1 = keras.Input(shape=(10, ), name='model_input') x = keras.layers.Dense(32, activation='relu')(inputs1) x = keras.layers.Dense(32, activation='relu')(x) outputs1 = keras.layers.Dense(1, activation='sigmoid')(x) outputs2 = keras.layers.Dense(1, activation='relu')(x) fun_model = keras.Model(inputs=[inputs1], outputs=[outputs1, outputs2], name='fun') with self.assertRaisesRegex( ValueError, 'Provided output is not one of model outputs'): keras_metadata_builder.KerasGraphMetadataBuilder( fun_model, outputs_to_explain=[fun_model.layers[0].output])
def test_number_of_weights(): """Make sure the number of trainable weights is set up correctly""" N_HIDDEN = 5 N_MIXES = 5 inputs = keras.layers.Input(shape=(1, )) x = keras.layers.Dense(N_HIDDEN, activation='relu')(inputs) m = mdn.MDN(1, N_MIXES) predictions = m(x) model = keras.Model(inputs=inputs, outputs=predictions) model.compile(loss=mdn.get_mixture_loss_func(1, N_MIXES), optimizer=keras.optimizers.Adam()) num_mdn_params = np.sum( [w.get_shape().num_elements() for w in m.trainable_weights]) assert (num_mdn_params == 90)
def test_get_metadata_multiple_outputs(self): inputs1 = keras.Input(shape=(10, ), name='model_input') x = keras.layers.Dense(32, activation='relu')(inputs1) x = keras.layers.Dense(32, activation='relu')(x) outputs1 = keras.layers.Dense(1, activation='sigmoid')(x) outputs2 = keras.layers.Dense(1, activation='relu')(x) fun_model = keras.Model(inputs=[inputs1], outputs=[outputs1, outputs2], name='fun') builder = keras_metadata_builder.KerasGraphMetadataBuilder( fun_model, outputs_to_explain=[fun_model.outputs[0]]) generated_md = builder.get_metadata() expected_outputs = { 'dense_2/Sigmoid': { 'output_tensor_name': 'dense_2/Sigmoid:0' } } self.assertDictEqual(expected_outputs, generated_md['outputs'])
def build_model( n_classes: int, n_packet_features: int, n_meta_features: int = 7, dilations: bool = True, tag: str = "varcnn", ): """Build the Var-CNN model. The resulting model takes a single input of shape (n_samples, n_packet_features + n_meta_features). The meta features must be the rightmost (last) features in the matrix. The model handles separating the two types of features and reshaping them as necessary. Parameters: ----------- n_classes : The number of classes to be predicted. n_packet_features : The number of packet features such as the number of interarrival times or the number of packet directions or sizes. n_meta_features: The number of meta features such as total packet counts, total transmission duration, etc. """ use_metadata = n_meta_features > 0 # Constructs dir or time ResNet input_layer = keras.Input( shape=(n_packet_features + n_meta_features, ), name="input") layer = (Crop(end=n_packet_features)(input_layer) if use_metadata else input_layer) layer = layers.Reshape((n_packet_features, 1))(layer) output_layer = ResNet18( layer, tag, block=(dilated_basic_1d if dilations else basic_1d)) concat_params = [output_layer] combined = concat_params[0] # Construct MLP for metadata if use_metadata: metadata_output = Crop(start=-n_meta_features)(input_layer) # consider this the embedding of all the metadata metadata_output = layers.Dense(32)(metadata_output) metadata_output = layers.BatchNormalization()( metadata_output) metadata_output = layers.Activation('relu')(metadata_output) concat_params.append(metadata_output) combined = layers.Concatenate()(concat_params) # Better to have final fc layer if combining multiple models if len(concat_params) > 1: combined = layers.Dense(1024)(combined) combined = layers.BatchNormalization()(combined) combined = layers.Activation('relu')(combined) combined = layers.Dropout(0.5)(combined) model_output = layers.Dense(units=n_classes, activation='softmax', name='model_output')(combined) model = keras.Model(inputs=input_layer, outputs=model_output) model.compile( loss='categorical_crossentropy', metrics=['accuracy'], optimizer=keras.optimizers.Adam(0.001)) return model
def build(self, Px, dx, X, N_neurons=180, eta=0.01, exp_decay=[50, 0.9]): ''' Set up the Feature Extractor. Following parameters are required: - Px: distribution of the x, with shape [N, N]. N will be used as number of points in each side of the lattice - dx: discretization size of the lattice sum Px dx**2 = 1 in this 2d case - X : points of the lattice (usually meshgrid). They must be the same on which Px has been calculated - N_neurons of the neural network (same in each layer) - eta : learning rate - exp_decay = [decay rate, decay step] for an exponentially decaying learning rate In particular decayed_learning_rate = learning_rate *decay_rate ^ (global_step / decay_steps) ''' self.xdelta = dx self.x_points = X self.P_x = Px self.N = np.shape(Px)[0] self.P_x_short = self.P_x.reshape([1, self.N*self.N]) self.Ny = self.N self.eta = eta self.neurons_feature = N_neurons self.graph = tf.Graph() with self.graph.as_default(): ############################# # Define the input placeholders and the feature neural network self.tf_x = tf.placeholder(tf.float32, shape=[2, None]) self.tf_i = tf.placeholder(tf.float32) self.tf_a = tf.placeholder(tf.float32) self.tf_theta_input = K.layers.Input(shape=(2,)) self.tf_theta_layers = self.tf_theta_input self.tf_theta_layers = K.layers.Dense(self.neurons_feature, input_shape=(2,), activation=K.layers.ReLU(), kernel_initializer=tf.random_normal_initializer(), bias_initializer=tf.random_normal_initializer())(self.tf_theta_layers) self.tf_theta_layers = K.layers.Dense(self.neurons_feature, activation=K.layers.ReLU(), kernel_initializer=tf.initializers.glorot_normal(), bias_initializer=tf.random_normal_initializer())(self.tf_theta_layers) self.tf_theta_layers = K.layers.Dense(self.neurons_feature, activation=K.activations.tanh, kernel_initializer=tf.initializers.glorot_normal(), bias_initializer=tf.random_normal_initializer())(self.tf_theta_layers) self.tf_theta_layers = K.layers.Dense(1, kernel_initializer=tf.initializers.glorot_normal(), bias_initializer=tf.random_normal_initializer())(self.tf_theta_layers) self.tf_theta_net = K.Model(self.tf_theta_input, self.tf_theta_layers) self.tf_f = self.tf_a * tf.reshape(self.tf_theta_net(tf.transpose(self.tf_x)), [1, -1]) ############################# # Regularizing Term self.tf_grads_f = tf.gradients(self.tf_f, self.tf_x)[0] self.tf_norm2_grad_f = tf.reduce_sum(self.tf_grads_f**2, 0) self.tf_term1_local = -0.5 * ( tf.log(self.tf_norm2_grad_f)*self.P_x_short*self.xdelta**2) self.tf_term1 = -0.5 * tf.reduce_sum( tf.log(self.tf_norm2_grad_f) * self.P_x_short*self.xdelta**2) ############################# # Entropy Term # Define current range of the feature (in which to approximate Py) self.tf_y_min = tf.reduce_min(self.tf_f) self.tf_y_max = tf.reduce_max(self.tf_f) self.tf_ydelta = tf.stop_gradient( (self.tf_y_max-self.tf_y_min)/(self.Ny-1)) self.tf_y_linspace = tf.reshape(tf.stop_gradient(tf.linspace(self.tf_y_min, self.tf_y_max, self.Ny)), [self.Ny, 1]) # Define a triangular histogram (so that it is differentiable) self.tf_y_mask_left = tf.logical_and((self.tf_y_linspace - self.tf_ydelta < self.tf_f), (self.tf_y_linspace > self.tf_f)) self.tf_y_mask_right = tf.logical_and((self.tf_y_linspace <= self.tf_f), (self.tf_y_linspace + self.tf_ydelta > self.tf_f)) self.tf_y_line_left = ( 1/self.tf_ydelta + 1/self.tf_ydelta**2*(self.tf_f-self.tf_y_linspace)) self.tf_y_line_right = ( 1/self.tf_ydelta - 1/self.tf_ydelta**2*(self.tf_f-self.tf_y_linspace)) self.tf_ydelta_left = self.tf_y_line_left * tf.stop_gradient(tf.cast(self.tf_y_mask_left, tf.float32)) self.tf_ydelta_right = self.tf_y_line_right * tf.stop_gradient(tf.cast(self.tf_y_mask_right, tf.float32)) # Approximate the distribution of the feature through a differentiable histogram self.tf_P_y = tf.reduce_sum((self.tf_ydelta_left+self.tf_ydelta_right)*self.P_x_short*self.xdelta**2, 1) # Calculate the Entropy of the feature self.tf_H_y = - tf.reduce_sum( self.tf_P_y*tf.log(self.tf_P_y))*self.tf_ydelta ############################# # Renormalized Mutual Information and training methods self.tf_cost = self.tf_term1 + self.tf_H_y # Optimizer (with exponential decaying learning rate) self.tf_optimizer = tf.train.GradientDescentOptimizer( learning_rate=tf.train.exponential_decay(self.eta, self.tf_i, exp_decay[0], exp_decay[1])) # Gradients of the cost function self.tf_grad_cost = self.tf_optimizer.compute_gradients( -self.tf_cost, self.tf_theta_net.trainable_variables) # Train step self.tf_train_step = self.tf_optimizer.apply_gradients( self.tf_grad_cost) # Initialize the neural network self.tf_init_op = tf.global_variables_initializer() self.sess = tf.Session(graph=self.graph) self.sess.run(self.tf_init_op) self.costs = []