def run_app_predictor(config):
    try:
        if config.model_name == "feat_ext_bert":
            predictor = AppPredictor(config,
                                     input_keys=["input_ids", "input_mask", "segment_ids"],
                                     output_keys=["pool_output", "first_token_output", "all_hidden_outputs"],
                                     job_name="ez_bert_feat")
            predictor.run_predict()
        elif config.model_name in ["text_comprehension_bert", "text_comprehension_bert_hae"]:
            input_keys = ["input_ids", "input_mask", "segment_ids"] if config.model_name == "text_comprehension_bert" \
                else ["input_ids", "input_mask", "segment_ids", "history_answer_marker"]
            predictor = AppPredictor(config,
                                     input_keys=input_keys,
                                     output_keys=["start_logits", "end_logits"],
                                     job_name=config.model_name + "_predictor")
            preprocessor = preprocessors.get_preprocessor(
                config.pretrain_model_name_or_path,
                thread_num=predictor.thread_num,
                input_queue=queue.Queue(),
                output_queue=queue.Queue(),
                preprocess_batch_size=config.predict_batch_size,
                user_defined_config=config,
                app_model_name=config.model_name)
            postprocessor = postprocessors.get_postprocessors(
                n_best_size=int(config.n_best_size) if hasattr(config, "n_best_size") else 20,
                max_answer_length=int(config.max_answer_length) if hasattr(config, "max_answer_length") else 30,
                output_schema=config.output_schema,
                app_model_name=config.model_name,
                thread_num=predictor.thread_num,
                input_queue=queue.Queue(),
                output_queue=queue.Queue())
            predictor.run_predict(preprocessor=preprocessor, postprocessor=postprocessor)
        elif config.model_name in ["text_match_dam", "text_match_damplus", "text_match_bicnn",
                                   "text_match_hcnn", "text_classify_cnn"]:
            predictor = AppPredictor(config,
                                     input_keys=["input_ids_a", "input_mask_a", "input_ids_b", "input_mask_b"],
                                     output_keys=["predictions", "probabilities", "logits"],
                                     job_name=config.model_name + "_predictor")
            preprocessor = DeepTextPreprocessor(config,
                                                thread_num=predictor.thread_num,
                                                input_queue=queue.Queue(),
                                                output_queue=queue.Queue(),
                                                job_name=config.model_name + "_predictor")
            predictor.run_predict(preprocessor=preprocessor)
        elif config.model_name in ["text_match_bert_two_tower"]:
            raise NotImplementedError
        else:
            predictor = AppPredictor(config,
                                     input_keys=["input_ids", "input_mask", "segment_ids"],
                                     output_keys=["predictions", "probabilities", "logits"],
                                     job_name=config.model_name + "_predictor")
            predictor.run_predict()
    except Exception as e:
        traceback.print_exc(file=sys.stderr)
        raise RuntimeError(str(e))
    def build_logits(self, features, mode=None):
        """ Building DAM text match graph

        Args:
            features (`OrderedDict`): A dict mapping raw input to tensors
            mode (`bool`): tell the model whether it is under training
        Returns:
            logits (`Tensor`): The output after the last dense layer. Shape of [None, num_labels]
            label_ids (`Tensor`): label_ids, shape of [None]
        """
        text_preprocessor = DeepTextPreprocessor(self.config, mode=mode)
        text_a_indices, text_a_masks, text_b_indices, text_b_masks, label_ids = text_preprocessor(
            features)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        word_embeddings = self._add_word_embeddings(
            vocab_size=text_preprocessor.vocab.size,
            embed_size=self.config.embedding_size,
            pretrained_word_embeddings=text_preprocessor.
            pretrained_word_embeddings,
            trainable=not self.config.fix_embedding)
        a_embeds = tf.nn.embedding_lookup(word_embeddings, text_a_indices)
        b_embeds = tf.nn.embedding_lookup(word_embeddings, text_b_indices)

        dam_output_features = layers.DAMEncoder(self.config.hidden_size)(
            [a_embeds, b_embeds, text_a_masks, text_b_masks],
            training=is_training)

        dam_output_features = tf.layers.dropout(
            dam_output_features,
            rate=0.2,
            training=is_training,
            name='dam_out_features_dropout')
        dam_output_features = layers.Dense(
            self.config.hidden_size,
            activation=tf.nn.relu,
            kernel_initializer=layers.get_initializer(0.02),
            name='dam_out_features_projection')(dam_output_features)

        logits = layers.Dense(self.config.num_labels,
                              kernel_initializer=layers.get_initializer(0.02),
                              name='output_layer')(dam_output_features)

        self.check_and_init_from_checkpoint(mode)
        return logits, label_ids
Exemple #3
0
    def build_logits(self, features, mode=None):
        """ Building DAM text match graph

        Args:
            features (`OrderedDict`): A dict mapping raw input to tensors
            mode (`bool`): tell the model whether it is under training
        Returns:
            logits (`Tensor`): The output after the last dense layer. Shape of [None, num_labels]
            label_ids (`Tensor`): label_ids, shape of [None]
        """
        text_preprocessor = DeepTextPreprocessor(self.config, mode=mode)
        text_indices, text_masks, _, _, label_ids = text_preprocessor(features)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        word_embeddings = self._add_word_embeddings(
            vocab_size=text_preprocessor.vocab.size,
            embed_size=self.config.embedding_size,
            pretrained_word_embeddings=text_preprocessor.
            pretrained_word_embeddings,
            trainable=not self.config.fix_embedding)
        text_embeds = tf.nn.embedding_lookup(word_embeddings, text_indices)

        output_features = layers.TextCNNEncoder(
            num_filters=self.config.num_filters,
            filter_sizes=self.config.filter_sizes,
            embed_size=self.config.embedding_size,
            max_seq_len=self.config.sequence_length,
        )([text_embeds, text_masks], training=is_training)

        output_features = tf.layers.dropout(output_features,
                                            rate=self.config.dropout_rate,
                                            training=is_training,
                                            name='output_features')

        logits = layers.Dense(self.config.num_labels,
                              kernel_initializer=layers.get_initializer(0.02),
                              name='output_layer')(output_features)

        self.check_and_init_from_checkpoint(mode)
        return logits, label_ids
    def build_logits(self, features, mode=None):
        text_preprocessor = DeepTextPreprocessor(self.config, mode=mode)
        text_a_indices, text_a_masks, text_b_indices, text_b_masks, label_ids = text_preprocessor(
            features)
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        word_embeddings = self._add_word_embeddings(
            vocab_size=text_preprocessor.vocab.size,
            embed_size=self.config.embedding_size,
            pretrained_word_embeddings=text_preprocessor.
            pretrained_word_embeddings,
            trainable=not self.config.fix_embedding)
        a_embeds = tf.nn.embedding_lookup(word_embeddings, text_a_indices)
        b_embeds = tf.nn.embedding_lookup(word_embeddings, text_b_indices)

        hcnn_output_features = layers.HybridCNNEncoder(
            num_filters=self.config.hidden_size,
            l2_reg=self.config.l2_reg,
            filter_size=self.config.filter_size)(
                [a_embeds, b_embeds, text_a_masks, text_b_masks])

        hcnn_output_features = tf.layers.dropout(
            hcnn_output_features,
            rate=0.2,
            training=is_training,
            name='dam_out_features_dropout')
        hcnn_output_features = layers.Dense(
            self.config.hidden_size,
            activation=tf.nn.relu,
            kernel_initializer=layers.get_initializer(0.02),
            name='dam_out_features_projection')(hcnn_output_features)

        logits = layers.Dense(self.config.num_labels,
                              kernel_initializer=layers.get_initializer(0.02),
                              name='output_layer')(hcnn_output_features)

        self.check_and_init_from_checkpoint(mode)
        return logits, label_ids