def test_logistic_regression(X , y , test_size = 0.3 , iters = 10 , num_folds = 5): # if os.path.isfile('models/lr.model.p'): # return pickle.load(open('models/lr.model.p')) lr = LogisticRegression() grid_params = { 'C':[0.01,0.1,1.0,10,100]} clf = perform_classification(lr , grid_params , X , y , test_size , iters, num_folds ) # pickle.dump(clf , open('models/lr.model.p','wb')) return clf
def test_support_vectors(X , y , test_size = 0.3 , iters = 10 , num_folds = 4): # if os.path.isfile('models/svm.model.p'): # return pickle.load(open('models/svm.model.p')) svm = SVC() # svm = LinearSVC() # grid_params = {'C': [1, 10, 100, 1000], 'kernel': ['linear','rbf']} # {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']}] grid_params = {'C': [1, 10, 100, 1000]} clf = perform_classification(svm , grid_params , X , y , test_size , iters, num_folds ) # pickle.dump(clf , open('models/svm.model.p','wb')) return clf
def test_random_forests(X , y , test_size = 0.3 , iters = 20 , num_folds = 5): # if os.path.isfile('models/rf.model.p'): # return pickle.load(open('models/rf.model.p')) # X = np.delete(X , 8 , 1) # print X.shape rf = RandomForestClassifier() grid_params = { 'n_estimators': [10,20,30], 'max_depth': [10,20,30], 'min_samples_split': [2 , 3 , 4 ]} clf = perform_classification(rf , grid_params , X , y , test_size , iters, num_folds ) feature_order = [i[0]+1 for i in sorted(enumerate(clf.best_estimator_.feature_importances_), key=lambda x:x[1] , reverse=True)] print feature_order # pickle.dump(clf , open('models/rf.model.p','wb')) return clf