def build_word_list(self): text, _ = load_data.load_questions(self.file_path) text = text[0::2] text = [nlp.tokens(t) for t in text] text = [word for sentence in text for word in sentence] text = list(set(text)) text.sort() return text
def fit(self): texts, targets = load_data.load_questions(self.file_path) data = [] for t in texts: vector = self.text_vector(t, False) data.append(vector) model = svm.SVC() # model = GaussianNB() model.fit(data, targets) return model
def predict(self, query, model=None, word_vector_hash=None): pred = self.hardcode(query) if pred != None: return pred if model == None or word_vector_hash == None: questions, types = load_data.load_questions(self.file_path) questions = questions[1::2] types = types[1::2] model, word_vector_hash = self.fit(questions, types) q = query.split(" ") q_vect = self.question_vectors(word_vector_hash, [q])[0] pred = model.predict(q_vect)[0] return pred
def __init__(self, file_path, model, test_size=0.3): self.file_path = file_path self.model = model self.data, self.target = load_data.load_questions(self.file_path) self.test_size = test_size self.train, self.test, self.t_train, self.t_test = None, None, None, None