def main():
    from predict import get_test_data
    X,y,test_X,test_Y =get_test_data()
    print(X)
    knn = KNNClassifier(n_neighbors=50,weight=distanceWeight)
    knn.test(X,y,test_X,test_Y)
    def get_proba(self,X,y,test_X,prefix=''):
        if isfile(prefix+'bag_of_words_random_forest_train.pkl'):
            rf_proba_train = pd.read_pickle(prefix+'bag_of_words_random_forest_train.pkl')
            rf_proba_test =  pd.read_pickle(prefix+'bag_of_words_random_forest_test.pkl')
        else:
            self.fit(X,y)
            X = self._transform_data(X)
            test_X=self._transform_data(test_X)
            rf_proba_train = pd.DataFrame(self.random_forest_pipe.predict_proba(X))
            rf_proba_train.to_pickle(prefix+'bag_of_words_random_forest_train.pkl')
            rf_proba_test = pd.DataFrame(self.random_forest_pipe.predict_proba(test_X))
            rf_proba_test.to_pickle(prefix+'bag_of_words_random_forest_test.pkl')
        return [(rf_proba_train,rf_proba_test)]


    def test(self,X,y,test_X,test_Y):
        print("_"*80)
        print(self.random_forest_pipe)
        self.fit(X,y)
        pred = self.predict(test_X)
        print(metrics.accuracy_score(test_Y,pred))
        rf = self.random_forest_pipe.named_steps['model']
        print(rf.feature_importances_)


if __name__ == '__main__':
    from predict import get_test_data
    X,y,test_X,test_Y =get_test_data()
    bow = BagOfWordsClassifier()
    bow.test(X,y,test_X,test_Y)