Beispiel #1
0
    def transform(self, texts):

        _check_st()

        model = SentenceTransformer('bert-base-nli-mean-tokens')
        X = np.array(model.encode(texts))
        return X
 def transform(self, texts):
     try:
         from sentence_transformers.SentenceTransformer import SentenceTransformer  #noqa
     except ImportError:
         print("Error: install sentence_transformers package "
               "(`pip install sentence_transformers`)"
               " to use Sentence BERT.")
         sys.exit(192)
     model = SentenceTransformer('bert-base-nli-mean-tokens')
     X = np.array(model.encode(texts))
     return X
    def __init__(self,
                 faq_path: str,
                 faq_data: dict = None,
                 model_path: str = None):
        """ 
        Either mention the model path to previously saved model ,
        or let it be , none
        when model path is None , the model will be a transformer model, with roBERTa base
        faq_name is the name of the faq , generated questions and answers , if it already exists , we will used the
        processed questions and answers , otherwise , we have to create a new one
        if faq name , has not been processed atleast once , you must provide faq_data
        faq_data --> dict has two keys , question_to_label , and answer_to_label
        {q2l : {} , a2l : {}}
        1) question_to_label
        2) answer_to_label

        
        q2l = {"How are you doing " : 1 , "where are you ? ": 3}
        a2l = {"I am fine" : 1 , "I am in India": 3}
        faq_data = {"questiontolabel" : q2l , "answertolabel" : a2l}

        question_to_labels is again a dictionary from questions : label(int)  can have multiple questions for same label
        answer_to_labels is a dictionary from answers to label : one label per answer (strict !!!)
        """
        if (model_path == None):
            model_path = 'roberta-base-nli-stsb-mean-tokens'

        self.model = SentenceTransformer(model_path)
        self.current_faq = None
        self.faq_path = faq_path
        self.question_to_label = {
        }  # contans all the augmented and orignal questions mapped to their labels
        self.answer_to_label = {}  #  contains mapping form answer to labels
        # current data is to be filled using the fit_FAQ function call
        # it has 3 keys 1) embeddings  (a np array) 2) labels 3) label_to_answer dict

        if (self.check_faq_path()):
            print("found preexisiting faq data , loading dicts from the same")
            que_path = os.path.join(self.faq_path, "questions.pkl")
            self.question_to_label = load_dict(que_path)

            ans_path = os.path.join(self.faq_path, "answers.pkl")
            self.answer_to_label = load_dict(ans_path)

        else:
            assert not faq_data is None, "Did not find and preexisting of {} so you must provide faq_data".format(
                faq_data)
            self.make_faq(faq_data)