예제 #1
0
    def __init__(
        self,
        model: str = "distilbert-base-uncased-distilled-squad",
        tokenizer: str = "distilbert-base-uncased",
        context_window_size: int = 30,
        use_gpu: int = 0,
        n_best_per_passage: int = 2,
        no_answer: bool = True
    ):
        """
        Load a QA model from Transformers.
        Available models include:
        - distilbert-base-uncased-distilled-squad
        - bert-large-cased-whole-word-masking-finetuned-squad
        - bert-large-uncased-whole-word-masking-finetuned-squad

        See https://huggingface.co/models for full list of available QA models

        :param model: name of the model
        :param tokenizer: name of the tokenizer (usually the same as model)
        :param context_window_size: num of chars (before and after the answer) to return as "context" for each answer.
                            The context usually helps users to understand if the answer really makes sense.
        :param use_gpu: < 0  -> use cpu
                        >= 0 -> ordinal of the gpu to use
        :param n_best_per_passage: num of best answers to take into account for each passage
        :param no_answer: True -> Hugging Face model could return an "impossible"/"empty" answer (i.e. when there is an unanswerable question)
                        False -> otherwise

        """
        self.model = pipeline('question-answering', model=model, tokenizer=tokenizer, device=use_gpu)
        self.context_window_size = context_window_size
        self.n_best_per_passage = n_best_per_passage
        self.no_answer = no_answer
예제 #2
0
    def __init__(self,
                 model: str = "distilbert-base-uncased-distilled-squad",
                 tokenizer: Optional[str] = None,
                 context_window_size: int = 70,
                 use_gpu: int = 0,
                 top_k_per_candidate: int = 4,
                 return_no_answers: bool = True,
                 max_seq_len: int = 256,
                 doc_stride: int = 128):
        """
        Load a QA model from Transformers.
        Available models include:
        - distilbert-base-uncased-distilled-squad
        - bert-large-cased-whole-word-masking-finetuned-squad
        - bert-large-uncased-whole-word-masking-finetuned-squad

        See https://huggingface.co/models for full list of available QA models

        :param model: name of the model
        :param tokenizer: name of the tokenizer (usually the same as model)
        :param context_window_size: num of chars (before and after the answer) to return as "context" for each answer.
                            The context usually helps users to understand if the answer really makes sense.
        :param use_gpu: < 0  -> use cpu
                        >= 0 -> ordinal of the gpu to use
        :param top_k_per_candidate: How many answers to extract for each candidate doc that is coming from the retriever (might be a long text).
                                           Note: - This is not the number of "final answers" you will receive
                                           (see `top_k` in TransformersReader.predict() or Finder.get_answers() for that)
                                         - Can includes no_answer in the sorted list of predictions
        :param return_no_answers: True -> Hugging Face model could return an "impossible"/"empty" answer (i.e. when there is an unanswerable question)
                                  False -> otherwise
                                  no_answer_boost is unfortunately not available with TransformersReader. If you would like to
                                  set no_answer_boost, use a FARMReader
        :param max_seq_len: max sequence length of one input text for the model
        :param doc_stride: length of striding window for splitting long texts (used if len(text) > max_seq_len)

        """
        self.model = pipeline('question-answering',
                              model=model,
                              tokenizer=tokenizer,
                              device=use_gpu)
        self.context_window_size = context_window_size
        self.top_k_per_candidate = top_k_per_candidate
        self.return_no_answers = return_no_answers
        self.max_seq_len = max_seq_len
        self.doc_stride = doc_stride
예제 #3
0
    def __init__(self,
                 model: str = "distilbert-base-uncased-distilled-squad",
                 tokenizer: str = "distilbert-base-uncased",
                 context_window_size: int = 30,
                 use_gpu: int = 0,
                 top_k_per_candidate: int = 4,
                 no_answer: bool = True):
        """
        Load a QA model from Transformers.
        Available models include:
        - distilbert-base-uncased-distilled-squad
        - bert-large-cased-whole-word-masking-finetuned-squad
        - bert-large-uncased-whole-word-masking-finetuned-squad

        See https://huggingface.co/models for full list of available QA models

        :param model: name of the model
        :param tokenizer: name of the tokenizer (usually the same as model)
        :param context_window_size: num of chars (before and after the answer) to return as "context" for each answer.
                            The context usually helps users to understand if the answer really makes sense.
        :param use_gpu: < 0  -> use cpu
                        >= 0 -> ordinal of the gpu to use
        :param top_k_per_candidate: How many answers to extract for each candidate doc that is coming from the retriever (might be a long text).
                                           Note: - This is not the number of "final answers" you will receive
                                           (see `top_k` in TransformersReader.predict() or Finder.get_answers() for that)
                                         - Can includes no_answer in the sorted list of predictions
        :param no_answer: True -> Hugging Face model could return an "impossible"/"empty" answer (i.e. when there is an unanswerable question)
                        False -> otherwise

        """
        self.model = pipeline('question-answering',
                              model=model,
                              tokenizer=tokenizer,
                              device=use_gpu)
        self.context_window_size = context_window_size
        self.top_k_per_candidate = top_k_per_candidate
        self.no_answer = no_answer