def fit(self, X, y, eval_set=None): X = transforming.vectorize_and_concatenate_qa(X, self.vectorizer) if eval_set is not None: X_val = transforming.vectorize_and_concatenate_qa( eval_set[0], self.vectorizer, do_fit_vectorizer=False) eval_set = (X_val, eval_set[1]) self.classifier.fit(X, y, eval_set=eval_set, early_stopping_rounds=800, verbose=100)
def predict_proba(self, X): if type(X) is not pd.DataFrame: X = pd.DataFrame(np.reshape(X, (-1, 2)), columns=['question', 'text']) X = transforming.vectorize_and_concatenate_qa( X, self.vectorizer, do_fit_vectorizer=False).tocsr() return self.classifier.predict_proba(X)
def grid_search(self, X, y, param_grid, scoring=None, n_jobs=-2): X = transforming.vectorize_and_concatenate_qa(X, self.vectorizer) gs = GridSearchCV(estimator=self.classifier, param_grid=param_grid, scoring=scoring, cv=3, verbose=1, n_jobs=n_jobs) gs.fit(X, y) return gs.best_params_
def fit(self, X, y): X = transforming.vectorize_and_concatenate_qa(X, self.vectorizer) self.classifier.fit(X, y)
def fit(self, X, y): X = transforming.vectorize_and_concatenate_qa(X, self.vectorizer).tocsr() self.classifier.fit(X, y, verbose=200)