def _build_network(self, vocab_size, char_vocab_size, emb_weights=None, hidden_units=256, trainable=False, batch_size=1): print('Build model...') text_input = Input(shape=(self._line_maxlen, )) if (len(emb_weights) == 0): emb = Embedding(vocab_size, 128, input_length=self._line_maxlen, embeddings_initializer='glorot_normal', trainable=trainable)(text_input) else: emb = Embedding(vocab_size, emb_weights.shape[1], input_length=self._line_maxlen, weights=[emb_weights], trainable=trainable)(text_input) char_lstm1 = LSTM(int(hidden_units / 4), kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', dropout=0.25, recurrent_dropout=0.5, unit_forget_bias=False, return_sequences=True)(emb) char_lstm2 = LSTM(int(hidden_units / 4), kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='he_normal', activation='sigmoid', recurrent_activation='sigmoid', dropout=0.25, recurrent_dropout=0.5, unit_forget_bias=False, return_sequences=True, go_backwards=True)(emb) char_merged = add([char_lstm1, char_lstm2]) gmp = Attention()(char_merged) text_emb = Reshape((int(emb.shape[1]), int(emb.shape[2]), 1))(emb) text_cnn1 = Convolution2D( int(hidden_units / 8), (2, 1), kernel_initializer='he_normal', bias_initializer='he_normal', activation='relu', padding='valid', use_bias=True, input_shape=(1, self._line_char_maxlen))(text_emb) text_cnn1 = MaxPooling2D((2, 1))(text_cnn1) text_cnn1 = Dropout(0.5)(text_cnn1) text_cnn2 = Convolution2D(int(hidden_units / 4), (2, 1), kernel_initializer='he_normal', bias_initializer='he_normal', activation='relu', padding='valid', use_bias=True)(text_cnn1) text_cnn2 = MaxPooling2D((2, 1))(text_cnn2) text_cnn2 = Dropout(0.5)(text_cnn2) text_cnn3 = Convolution2D(int(hidden_units / 4), (2, 1), kernel_initializer='he_normal', bias_initializer='he_normal', activation='relu', padding='valid', use_bias=True)(text_cnn2) text_cnn3 = MaxPooling2D((2, 1))(text_cnn3) text_cnn3 = Dropout(0.5)(text_cnn3) char_gmp = GlobalMaxPooling2D()(text_cnn3) merged = concatenate([char_gmp, gmp]) output = Dense(2, activation='softmax')(merged) model = Model(inputs=[text_input], outputs=output) adam = Adam(lr=0.001) model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy']) print('No of parameter:', model.count_params()) print(model.summary()) return model
def _build_network(self, vocab_size, char_vocab_size, emb_weights=None, hidden_units=256, trainable=False, batch_size=1): print('Build model...') text_input = Input(name='text', shape=(self._line_maxlen, )) text_input_mask = Masking(mask_value=0)(text_input) if (len(emb_weights) == 0): emb = Embedding(vocab_size, 128, input_length=self._line_maxlen, embeddings_initializer='glorot_normal', trainable=trainable)(text_input_mask) else: emb = Embedding(vocab_size, emb_weights.shape[1], input_length=self._line_maxlen, weights=[emb_weights], trainable=trainable)(text_input_mask) emb = Reshape((int(emb.shape[1]), int(emb.shape[2]), 1))(emb) t_cnn1 = Convolution2D(int(hidden_units / 8), (2, 1), kernel_initializer='he_normal', bias_initializer='he_normal', activation='relu', padding='valid', use_bias=True, input_shape=(1, self._line_maxlen))(emb) t_cnn1 = MaxPooling2D((2, 1))(t_cnn1) t_cnn1 = Dropout(0.5)(t_cnn1) t_cnn2 = Convolution2D(int(hidden_units / 4), (2, 1), kernel_initializer='he_normal', bias_initializer='he_normal', activation='relu', padding='valid', use_bias=True)(t_cnn1) t_cnn2 = MaxPooling2D((2, 1))(t_cnn2) t_cnn2 = Dropout(0.5)(t_cnn2) t_cnn3 = Convolution2D(int(hidden_units / 4), (2, 1), kernel_initializer='he_normal', bias_initializer='he_normal', activation='relu', padding='valid', use_bias=True)(t_cnn2) t_cnn3 = MaxPooling2D((2, 1))(t_cnn3) t_cnn3 = Dropout(0.5)(t_cnn3) gmp = GlobalMaxPooling2D()(t_cnn3) char_input = Input(name='char_text', shape=(self._line_char_maxlen, )) char_emb = Embedding(char_vocab_size, 25, input_length=self._line_char_maxlen, embeddings_initializer='glorot_normal', trainable=True)(char_input) char_emb = Reshape( (int(char_emb.shape[1]), int(char_emb.shape[2]), 1))(char_emb) char_cnn1 = Convolution2D( int(hidden_units / 8), (2, 1), kernel_initializer='he_normal', bias_initializer='he_normal', activation='relu', padding='valid', use_bias=True, input_shape=(1, self._line_char_maxlen))(char_emb) char_cnn1 = MaxPooling2D((2, 1))(char_cnn1) char_cnn1 = Dropout(0.5)(char_cnn1) char_cnn2 = Convolution2D(int(hidden_units / 4), (2, 1), kernel_initializer='he_normal', bias_initializer='he_normal', activation='relu', padding='valid', use_bias=True)(char_cnn1) char_cnn2 = MaxPooling2D((2, 1))(char_cnn2) char_cnn2 = Dropout(0.5)(char_cnn2) char_cnn3 = Convolution2D(int(hidden_units / 4), (2, 1), kernel_initializer='he_normal', bias_initializer='he_normal', activation='relu', padding='valid', use_bias=True)(char_cnn2) char_cnn3 = MaxPooling2D((2, 1))(char_cnn3) char_cnn3 = Dropout(0.5)(char_cnn3) char_gmp = GlobalMaxPooling2D()(char_cnn3) merged = concatenate([char_gmp, gmp]) output = Dense(2, activation='softmax')(merged) model = Model(inputs=[text_input, char_input], outputs=output) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print('No of parameter:', model.count_params()) print(model.summary()) return model
class SiameseTripletGraphEmbedding(SiameseGraphEmbedding): def __init__(self, d=128, margin=0.2, batch_size=2048, lr=0.001, epochs=10, directed_proba=0.5, weighted=True, compression_func="sqrt", negative_sampling_ratio=2.0, max_length=1400, truncating="post", seed=0, verbose=False, conv1_kernel_size=12, conv1_batch_norm=False, max1_pool_size=6, conv2_kernel_size=6, conv2_batch_norm=True, max2_pool_size=3, lstm_unit_size=320, dense1_unit_size=1024, dense2_unit_size=512, directed_distance="euclidean", undirected_distance="euclidean", source_target_dense_layers=True, embedding_normalization=False, **kwargs): super().__init__(d, margin, batch_size, lr, epochs, directed_proba, weighted, compression_func, negative_sampling_ratio, max_length, truncating, seed, verbose, conv1_kernel_size, conv1_batch_norm, max1_pool_size, conv2_kernel_size, conv2_batch_norm, max2_pool_size, lstm_unit_size, dense1_unit_size, dense2_unit_size, directed_distance, undirected_distance, source_target_dense_layers, embedding_normalization, **kwargs) def identity_loss(self, y_true, y_pred): return K.mean(y_pred - 0 * y_true) def triplet_loss(self, inputs): encoded_i, encoded_j, encoded_k, is_directed = inputs positive_distance = Lambda(self.st_euclidean_distance, name="lambda_positive_distances")( [encoded_i, encoded_j, is_directed]) negative_distance = Lambda(self.st_euclidean_distance, name="lambda_negative_distances")( [encoded_i, encoded_k, is_directed]) return K.mean( K.maximum(0.0, positive_distance - negative_distance + self.margin)) def build_keras_model(self, multi_gpu=False): if multi_gpu: device = "/cpu:0" allow_soft_placement = True else: device = "/gpu:0" allow_soft_placement = False K.clear_session() tf.reset_default_graph() config = tf.ConfigProto(allow_soft_placement=allow_soft_placement, log_device_placement=True) self.sess = tf.Session(config=config) set_session(self.sess) with tf.device(device): input_seq_i = Input(batch_shape=(self.batch_size, None), name="input_seq_i") input_seq_j = Input(batch_shape=(self.batch_size, None), name="input_seq_j") input_seq_k = Input(batch_shape=(self.batch_size, None), name="input_seq_k") is_directed = Input(batch_shape=(self.batch_size, 1), dtype=tf.int8, name="is_directed") # build create_network to use in each siamese 'leg' self.lstm_network = self.create_lstm_network() # encode each of the two inputs into a vector with the conv_lstm_network encoded_i = self.lstm_network(input_seq_i) print(encoded_i) if self.verbose else None encoded_j = self.lstm_network(input_seq_j) print(encoded_j) if self.verbose else None encoded_k = self.lstm_network(input_seq_k) print(encoded_k) if self.verbose else None output = Lambda(self.triplet_loss, name="lambda_triplet_loss_output")( [encoded_i, encoded_j, encoded_k, is_directed]) self.siamese_net = Model( inputs=[input_seq_i, input_seq_j, input_seq_k, is_directed], outputs=output) # Multi-gpu parallelization if multi_gpu: self.siamese_net = multi_gpu_model(self.siamese_net, gpus=4, cpu_merge=True, cpu_relocation=False) # Compile & train self.siamese_net.compile( loss=self. identity_loss, # binary_crossentropy, cross_entropy, contrastive_loss optimizer=Adam(lr=self.lr, beta_1=0.9, beta_2=0.999, epsilon=0.1), ) print("Network total weights:", self.siamese_net.count_params()) if self.verbose else None def learn_embedding(self, network: MultiDigraphNetwork, network_val=None, multi_gpu=False, subsample=True, n_steps=500, validation_steps=None, tensorboard=True, histogram_freq=0, early_stopping=False, edge_f=None, is_weighted=False, no_python=False, rebuild_model=False, seed=0, **kwargs): generator_train = self.get_training_data_generator( network, n_steps, seed) if network_val is not None: self.generator_val = SampledTripletDataGenerator(network=network_val, weighted=self.weighted, batch_size=self.batch_size, replace=True, seed=seed, verbose=self.verbose, maxlen=self.max_length, padding='post', truncating="post", tokenizer=generator_train.tokenizer) \ if not hasattr(self, "generator_val") else self.generator_val else: self.generator_val = None assert generator_train.tokenizer.word_index == self.generator_val.tokenizer.word_index if not hasattr(self, "siamese_net") or rebuild_model: self.build_keras_model(multi_gpu) try: print(self.log_dir) self.hist = self.siamese_net.fit_generator( generator_train, epochs=self.epochs, validation_data=self.generator_val, validation_steps=validation_steps, callbacks=self.get_callbacks(early_stopping, tensorboard, histogram_freq), use_multiprocessing=True, workers=8, **kwargs) except KeyboardInterrupt: print("Stop training") finally: self.save_network_weights() def get_training_data_generator(self, network, n_steps=250, seed=0): if not hasattr(self, "generator_train"): self.generator_train = SampledTripletDataGenerator( network=network, weighted=self.weighted, batch_size=self.batch_size, replace=True, seed=seed, verbose=self.verbose, maxlen=self.max_length, padding='post', truncating=self.truncating) else: return self.generator_train self.node_list = self.generator_train.node_list return self.generator_train def get_reconstructed_adj(self, beta=2.0, X=None, node_l=None, node_l_b=None, edge_type="d", interpolate=False): """ :param X: :param node_l: list of node names :param edge_type: :return: """ if hasattr(self, "reconstructed_adj") and edge_type == "d": adj = self.reconstructed_adj else: embs = self.get_embeddings() assert len(self.node_list) == embs.shape[0] adj = self._pairwise_similarity(embs, edge_type) if interpolate: adj = np.interp(adj, (adj.min(), adj.max()), (0, 1)) if (node_l is None or node_l == self.node_list) and node_l_b is None: if edge_type == "d": self.reconstructed_adj = adj # Cache reconstructed_adj to memory for faster recall return adj elif set(node_l) < set(self.node_list) or node_l_b is not None: return self._select_adj_indices(adj, node_l, node_l_b) elif not (set(node_l) < set(self.node_list)): raise Exception("A node in node_l is not in self.node_list.") def _pairwise_similarity(self, embeddings, edge_type="d"): if edge_type == 'd': embeddings_X = embeddings[:, 0:int(self.embedding_d / 2)] embeddings_Y = embeddings[:, int(self.embedding_d / 2):self.embedding_d] if self.directed_distance == "euclidean_ball": embeddings_stacked = np.vstack([embeddings_X, embeddings_Y]) adj = radius_neighbors_graph(embeddings_stacked, radius=self.margin, n_jobs=-2) adj = adj[0:embeddings_X.shape[0], :][:, embeddings_X.shape[0]:] print("radius_neighbors_graph") elif self.directed_distance == "euclidean": adj = pairwise_distances(X=embeddings_X, Y=embeddings_Y, metric="euclidean", n_jobs=-2) # Get node-specific adaptive threshold # adj = self.transform_adj_adaptive_threshold(adj, margin=0) # adj = self.transform_adj_beta_exp(adj, edge_types="d", sample_negative=self.negative_sampling_ratio) adj = np.exp(-2.0 * adj) print("Euclidean dist") elif self.directed_distance == "cosine": adj = pairwise_distances(X=embeddings_X, Y=embeddings_Y, metric="cosine", n_jobs=-2) print("Cosine similarity") elif self.directed_distance == "dot_sigmoid": adj = np.matmul(embeddings_X, embeddings_Y.T) adj = sigmoid(adj) print("Dot product & sigmoid") elif self.directed_distance == "dot_softmax": adj = np.matmul(embeddings_X, embeddings_Y.T) adj = softmax(adj) print("Dot product & softmax") elif edge_type == 'u': if self.undirected_distance == "euclidean_ball": adj = radius_neighbors_graph(embeddings, radius=self.margin, n_jobs=-2) elif self.undirected_distance == "euclidean": adj = pairwise_distances(X=embeddings, metric="euclidean", n_jobs=-2) # adj = np.exp(-2.0 * adj) adj = self.transform_adj_beta_exp(adj, edge_types=["u", "u_n"], sample_negative=False) # adj = self.transform_adj_adaptive_threshold(adj, margin=self.margin/2) print("Euclidean dist") elif self.undirected_distance == "cosine": adj = pairwise_distances(X=embeddings, metric="cosine", n_jobs=-2) elif self.undirected_distance == "dot_sigmoid": adj = np.matmul(embeddings, embeddings.T) adj = sigmoid(adj) elif self.undirected_distance == "dot_softmax": adj = np.matmul(embeddings, embeddings.T) adj = softmax(adj) else: raise Exception("Unsupported edge_type", edge_type) return adj def transform_adj_adaptive_threshold(self, adj_pred, margin=0.2, edge_types="d"): print("adaptive threshold") adj_true = self.generator_train.network.get_adjacency_matrix( edge_types=edge_types, node_list=self.node_list) self.distance_threshold = self.get_adaptive_threshold( adj_pred, adj_true, margin) print("distance_threshold", self.distance_threshold) predicted_adj = np.zeros(adj_pred.shape) for node_id in range(predicted_adj.shape[0]): predicted_adj[node_id, :] = (adj_pred[node_id, :] < self.distance_threshold).astype(float) adj_pred = predicted_adj return adj_pred def get_adaptive_threshold(self, adj_pred, adj_true, margin): distance_threshold = np.zeros((len(self.node_list), )) for nonzero_node_id in np.unique(adj_true.nonzero()[0]): _, nonzero_node_cols = adj_true[nonzero_node_id].nonzero() positive_distances = adj_pred[nonzero_node_id, nonzero_node_cols] distance_threshold[nonzero_node_id] = np.min(positive_distances) median_threshold = np.min( distance_threshold[distance_threshold > 0]) + margin / 2 distance_threshold[distance_threshold == 0] = median_threshold return distance_threshold
def TCNN_model( len_params=9, init_shape=(4, 4, 100), opt_adam=None, loss='mse', ldr=0.25, verbose=True, ): """ Build a TCNN model. Keywords in comment lines: TCNN2D: 2D Transposed convolution layer (Deconvolution). CNN2D: 2D convolution layer (spatial convolution). ReLU: Rectified Linear Unit activation function. Args: len_params (int): Number of parameters to use. init_shape (tuple): Initial shape. A tuple of integers: height, width, channels. opt_adam (str): Name of optimizers or optimizer instance. If None, ADAM optimizer will be use. loss (str): Name of objective function or objective function or Loss instance. default: Mean Square Error ldr (int): Fraction of the input units to drop. verbose (bool): Whether or not verbose. Return: TCNN constructed model. """ # Use ADAM optimizer as default. if opt_adam is None: lr, b1, b2 = 0.0001, 0.5, 0.99 opt_adam = optimizers.Adam(learning_rate=lr, beta_1=b1, beta_2=b2) print( colored( f'\nThis model uses ADAM optimizer with {lr} learning rate and beta1={b1}, beta2={b2}.', 'green')) # Initial size. height, width, channels = init_shape # Input parameters. Specialize the number of parameters to use. input_layer = Input( shape=(len_params, ), name='params', ) # First fully connected layer with RelU. layer = Dense( units=625, use_bias=True, name='Dense_1', )(input_layer) layer = LeakyReLU( alpha=0.3, name='ReLu_d1', )(layer) # Apply Dropout to the input. layer = Dropout( rate=ldr, seed=32, trainable=False, name='Dropout', )(layer) # Second fully connected layer with ReLU. layer = Dense( units=1250, use_bias=True, name='Dense_2', )(layer) layer = LeakyReLU( alpha=0.3, name='ReLu_d2', )(layer) # Third fully connected layer with ReLU. layer = Dense( units=height * width * channels, use_bias=True, name='Dense_3', )(layer) layer = LeakyReLU( alpha=0.3, name='ReLu_3', )(layer) # Reshape from 1600 to 4x4x100. layer = Reshape( target_shape=(height, width, channels), name='Reshape_', )(layer) # Apply 5x5 TCNN2D and CNN2D: Up-sample from 4x4x100, to 11x11x48, 25x25x24 and 50x50x3 with 3 de-convolutions. # First TCNN2D with ReLU: Up-simple to 11x11x48. layer = Conv2DTranspose( filters=48, kernel_size=(5, 5), strides=(2, 2), padding='valid', name='Transp_1', )(layer) layer = LeakyReLU( alpha=0.3, name='ReLu_t1', )(layer) # First CNN2D with ReLU: Up-sample to 11x11x48. layer = Conv2D( filters=48, kernel_size=(5, 5), strides=(1, 1), padding='same', name='Conv2D_1', )(layer) layer = LeakyReLU( alpha=0.3, name='ReLu_c1', )(layer) # Second TCNN2D with ReLU: Up-simple to 25x25x24. layer = Conv2DTranspose( filters=24, kernel_size=(5, 5), strides=(2, 2), padding='valid', name='Transp_2', )(layer) layer = LeakyReLU( alpha=0.3, name='ReLu_t2', )(layer) # Second CNN2D with ReLU: Up-sample to 25x25x24. layer = Conv2D( filters=24, kernel_size=(5, 5), strides=(1, 1), padding='same', name='Conv2D_2', )(layer) layer = LeakyReLU( alpha=0.3, name='ReLu_c2', )(layer) # Third TCNN2D with ReLU: Up-simple to 50x50x3. layer = Conv2DTranspose( filters=3, kernel_size=(5, 5), strides=(2, 2), padding='same', name='Transp_3', )(layer) layer = LeakyReLU( alpha=0.3, name='ReLu_t3', )(layer) # Third CNN2D with ReLU: Up-sample to 50x50x3. layer = Conv2D( filters=3, kernel_size=(5, 5), strides=(1, 1), padding='same', name='Conv2D_3', )(layer) layer = LeakyReLU( alpha=0.3, name='ReLu_c3', )(layer) # Compile the model. model = Model(inputs=input_layer, outputs=layer, name='TCNN model') model.compile(optimizer=opt_adam, loss=loss, metrics=['acc', 'mae', 'mse']) # Show some information. if verbose: for layer in model.layers: if 'ReLu' not in layer.name and 'params' not in layer.name: print( f'{layer.name}:, [output shape: {layer.output_shape}, trainable? {layer.trainable}]' ) print( colored('\nTCNN model has {} trainable parameters.', 'green').format(model.count_params())) _, df = model.layers[0].input_shape _, h, w, c = model.output_shape print( colored( f'The degrees of freedom of the system increases from {df} to {h * w * c}.', 'green')) return model