def get_vsm_model_hyperp(): return { mh.VSM_Model_Hyperp.VECTORIZER_NGRAM_RANGE.value: (1,1), mh.VSM_Model_Hyperp.VECTORIZER_MAX_FEATURES.value: 400, mh.VSM_Model_Hyperp.VECTORIZER.value : TfidfVectorizer(stop_words='english', use_idf=True, smooth_idf=True), mh.VSM_Model_Hyperp.VECTORIZER_TOKENIZER.value : tok.WordNetBased_LemmaTokenizer() }
def set_tokenizer(self, **kwargs): self.tokenizer = tok.WordNetBased_LemmaTokenizer() if BM25_Model_Hyperp.TOKENIZER.value not in kwargs.keys() else kwargs[BM25_Model_Hyperp.TOKENIZER.value]