def main(): from predict import get_test_data X,y,test_X,test_Y =get_test_data() print(X) knn = KNNClassifier(n_neighbors=50,weight=distanceWeight) knn.test(X,y,test_X,test_Y)
def get_proba(self,X,y,test_X,prefix=''): if isfile(prefix+'bag_of_words_random_forest_train.pkl'): rf_proba_train = pd.read_pickle(prefix+'bag_of_words_random_forest_train.pkl') rf_proba_test = pd.read_pickle(prefix+'bag_of_words_random_forest_test.pkl') else: self.fit(X,y) X = self._transform_data(X) test_X=self._transform_data(test_X) rf_proba_train = pd.DataFrame(self.random_forest_pipe.predict_proba(X)) rf_proba_train.to_pickle(prefix+'bag_of_words_random_forest_train.pkl') rf_proba_test = pd.DataFrame(self.random_forest_pipe.predict_proba(test_X)) rf_proba_test.to_pickle(prefix+'bag_of_words_random_forest_test.pkl') return [(rf_proba_train,rf_proba_test)] def test(self,X,y,test_X,test_Y): print("_"*80) print(self.random_forest_pipe) self.fit(X,y) pred = self.predict(test_X) print(metrics.accuracy_score(test_Y,pred)) rf = self.random_forest_pipe.named_steps['model'] print(rf.feature_importances_) if __name__ == '__main__': from predict import get_test_data X,y,test_X,test_Y =get_test_data() bow = BagOfWordsClassifier() bow.test(X,y,test_X,test_Y)