def run_app_predictor(config): try: if config.model_name == "feat_ext_bert": predictor = AppPredictor(config, input_keys=["input_ids", "input_mask", "segment_ids"], output_keys=["pool_output", "first_token_output", "all_hidden_outputs"], job_name="ez_bert_feat") predictor.run_predict() elif config.model_name in ["text_comprehension_bert", "text_comprehension_bert_hae"]: input_keys = ["input_ids", "input_mask", "segment_ids"] if config.model_name == "text_comprehension_bert" \ else ["input_ids", "input_mask", "segment_ids", "history_answer_marker"] predictor = AppPredictor(config, input_keys=input_keys, output_keys=["start_logits", "end_logits"], job_name=config.model_name + "_predictor") preprocessor = preprocessors.get_preprocessor( config.pretrain_model_name_or_path, thread_num=predictor.thread_num, input_queue=queue.Queue(), output_queue=queue.Queue(), preprocess_batch_size=config.predict_batch_size, user_defined_config=config, app_model_name=config.model_name) postprocessor = postprocessors.get_postprocessors( n_best_size=int(config.n_best_size) if hasattr(config, "n_best_size") else 20, max_answer_length=int(config.max_answer_length) if hasattr(config, "max_answer_length") else 30, output_schema=config.output_schema, app_model_name=config.model_name, thread_num=predictor.thread_num, input_queue=queue.Queue(), output_queue=queue.Queue()) predictor.run_predict(preprocessor=preprocessor, postprocessor=postprocessor) elif config.model_name in ["text_match_dam", "text_match_damplus", "text_match_bicnn", "text_match_hcnn", "text_classify_cnn"]: predictor = AppPredictor(config, input_keys=["input_ids_a", "input_mask_a", "input_ids_b", "input_mask_b"], output_keys=["predictions", "probabilities", "logits"], job_name=config.model_name + "_predictor") preprocessor = DeepTextPreprocessor(config, thread_num=predictor.thread_num, input_queue=queue.Queue(), output_queue=queue.Queue(), job_name=config.model_name + "_predictor") predictor.run_predict(preprocessor=preprocessor) elif config.model_name in ["text_match_bert_two_tower"]: raise NotImplementedError else: predictor = AppPredictor(config, input_keys=["input_ids", "input_mask", "segment_ids"], output_keys=["predictions", "probabilities", "logits"], job_name=config.model_name + "_predictor") predictor.run_predict() except Exception as e: traceback.print_exc(file=sys.stderr) raise RuntimeError(str(e))
def build_logits(self, features, mode=None): """ Building DAM text match graph Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool`): tell the model whether it is under training Returns: logits (`Tensor`): The output after the last dense layer. Shape of [None, num_labels] label_ids (`Tensor`): label_ids, shape of [None] """ text_preprocessor = DeepTextPreprocessor(self.config, mode=mode) text_a_indices, text_a_masks, text_b_indices, text_b_masks, label_ids = text_preprocessor( features) is_training = (mode == tf.estimator.ModeKeys.TRAIN) word_embeddings = self._add_word_embeddings( vocab_size=text_preprocessor.vocab.size, embed_size=self.config.embedding_size, pretrained_word_embeddings=text_preprocessor. pretrained_word_embeddings, trainable=not self.config.fix_embedding) a_embeds = tf.nn.embedding_lookup(word_embeddings, text_a_indices) b_embeds = tf.nn.embedding_lookup(word_embeddings, text_b_indices) dam_output_features = layers.DAMEncoder(self.config.hidden_size)( [a_embeds, b_embeds, text_a_masks, text_b_masks], training=is_training) dam_output_features = tf.layers.dropout( dam_output_features, rate=0.2, training=is_training, name='dam_out_features_dropout') dam_output_features = layers.Dense( self.config.hidden_size, activation=tf.nn.relu, kernel_initializer=layers.get_initializer(0.02), name='dam_out_features_projection')(dam_output_features) logits = layers.Dense(self.config.num_labels, kernel_initializer=layers.get_initializer(0.02), name='output_layer')(dam_output_features) self.check_and_init_from_checkpoint(mode) return logits, label_ids
def build_logits(self, features, mode=None): """ Building DAM text match graph Args: features (`OrderedDict`): A dict mapping raw input to tensors mode (`bool`): tell the model whether it is under training Returns: logits (`Tensor`): The output after the last dense layer. Shape of [None, num_labels] label_ids (`Tensor`): label_ids, shape of [None] """ text_preprocessor = DeepTextPreprocessor(self.config, mode=mode) text_indices, text_masks, _, _, label_ids = text_preprocessor(features) is_training = (mode == tf.estimator.ModeKeys.TRAIN) word_embeddings = self._add_word_embeddings( vocab_size=text_preprocessor.vocab.size, embed_size=self.config.embedding_size, pretrained_word_embeddings=text_preprocessor. pretrained_word_embeddings, trainable=not self.config.fix_embedding) text_embeds = tf.nn.embedding_lookup(word_embeddings, text_indices) output_features = layers.TextCNNEncoder( num_filters=self.config.num_filters, filter_sizes=self.config.filter_sizes, embed_size=self.config.embedding_size, max_seq_len=self.config.sequence_length, )([text_embeds, text_masks], training=is_training) output_features = tf.layers.dropout(output_features, rate=self.config.dropout_rate, training=is_training, name='output_features') logits = layers.Dense(self.config.num_labels, kernel_initializer=layers.get_initializer(0.02), name='output_layer')(output_features) self.check_and_init_from_checkpoint(mode) return logits, label_ids
def build_logits(self, features, mode=None): text_preprocessor = DeepTextPreprocessor(self.config, mode=mode) text_a_indices, text_a_masks, text_b_indices, text_b_masks, label_ids = text_preprocessor( features) is_training = (mode == tf.estimator.ModeKeys.TRAIN) word_embeddings = self._add_word_embeddings( vocab_size=text_preprocessor.vocab.size, embed_size=self.config.embedding_size, pretrained_word_embeddings=text_preprocessor. pretrained_word_embeddings, trainable=not self.config.fix_embedding) a_embeds = tf.nn.embedding_lookup(word_embeddings, text_a_indices) b_embeds = tf.nn.embedding_lookup(word_embeddings, text_b_indices) hcnn_output_features = layers.HybridCNNEncoder( num_filters=self.config.hidden_size, l2_reg=self.config.l2_reg, filter_size=self.config.filter_size)( [a_embeds, b_embeds, text_a_masks, text_b_masks]) hcnn_output_features = tf.layers.dropout( hcnn_output_features, rate=0.2, training=is_training, name='dam_out_features_dropout') hcnn_output_features = layers.Dense( self.config.hidden_size, activation=tf.nn.relu, kernel_initializer=layers.get_initializer(0.02), name='dam_out_features_projection')(hcnn_output_features) logits = layers.Dense(self.config.num_labels, kernel_initializer=layers.get_initializer(0.02), name='output_layer')(hcnn_output_features) self.check_and_init_from_checkpoint(mode) return logits, label_ids