def auto_test_for_other_model(model, param_grid, name_scoring): data_df = get_sms_dataset(noStopwords=True, overwrite=True) X, y = data_df['message'], data_df['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.2) cv = CountVectorizer() X_train = cv.fit_transform(X_train) grid = gsc(model, param_grid, scoring=name_scoring) grid.fit(X_train, y_train) print(grid.best_params_, grid.best_score_) return grid.best_params_, grid.best_score_
def auto_test(x_data, y_data, begin, end, test_num, name_scoring): param_grid = [ { 'alpha': np.linspace(begin, end, test_num), 'fit_prior': [True, False] }, ] nb = MultinomialNB() # GridSearchCV grid = gsc(nb, param_grid, scoring=name_scoring) grid.fit(x_data, y_data) print(grid.best_params_, grid.best_score_) return float(grid.best_params_['alpha'])
parser.add_argument('--output', type=str, required=True, help="directory to save the output files") parser.add_argument('--lang', type=str, required=True, help="fr or zh") args = parser.parse_args() X_train = bp.unpack_ndarray_from_file(args.input + '/train_X.blp') Y_train = bp.unpack_ndarray_from_file(args.input + '/train_Y.blp') X_test = bp.unpack_ndarray_from_file(args.input + '/test_X.blp') Y_test = bp.unpack_ndarray_from_file(args.input + '/test_Y.blp') # train the model on training set model = LinearSVC(class_weight="balanced", random_state=42) # defining parameter range params = {"C": [0.0001, 0.001, 0.01, 0.02, 0.03, 0.04, 0.05, 0.1]} tense_clf = gsc(model, params, n_jobs=5, refit=True) #fitting the model for grid search tense_clf.fit(X_train, Y_train) # print best parameter after tuning print(tense_clf.best_params_) # print how our model looks after hyper-parameter tuning print(tense_clf.best_estimator_) y_pred = tense_clf.predict(X_test) print(classification_report(Y_test, y_pred)) #plot_confusion_matrix(model,X_test,Y_test,display_labels=['Past','Fut','Pres'],cmap=plt.cm.Blues,normalize='true')
# print prediction results y_predTrain1 = model.predict(X_train) print(accuracy_score(y_train, y_predTrain1)) y_pred1 = model.predict(X_test) print(accuracy_score(y_test,y_pred1)) print(classification_report(y_test,y_pred1)) e1 = time.perf_counter() print ("################## training without tuning: ",e1-s1) s2=time.perf_counter() # defining parameter range params = {"C": [0.0001,0.001,0.02,0.03,0.04,0.05,0.06,0.1,1,10,100,1000,10000,100000]} #params = {"C": expon(scale=100)} #model = LinearSVC(random_state=42) tense_clf = gsc(model,params,refit = True, cv=3,scoring='f1_macro') #fitting the model for grid search tense_clf.fit(X_train,y_train) # print best parameter after tuning print(tense_clf.best_params_) # print how our model looks after hyper-parameter tuning print(tense_clf.best_estimator_) y_predTrain = tense_clf.predict(X_train) print(accuracy_score(y_train, y_predTrain)) y_predTest = tense_clf.predict(X_test) print(accuracy_score(y_test, y_predTest)) print(classification_report(y_test,y_predTest)) e2=time.perf_counter() print ("################## training with tuning: ",e1-s1)
y_test = y_test.tolist() #print(y_test[:100]) s1 = time.perf_counter() # modèle pres non-présent # train the model on train set model1 = LinearSVC(C=0.01, class_weight="balanced") model2 = LinearSVC(C=0.01, class_weight="balanced", random_state=42) params = { "C": [ 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.014, 0.015, 0.016, 0.017, 0.02 ] } pres_notPres_clf = gsc(model1, params, n_jobs=5, refit=True) # fitting the model for grid search pres_notPres_clf.fit(X1_train, y1_train) # print best parameter after tuning print(pres_notPres_clf.best_params_) # print how our model looks after hyper-parameter tuning print(pres_notPres_clf.best_estimator_) y1_predTrain = pres_notPres_clf.predict(X1_train) print(classification_report(y1_train, y1_predTrain, digits=3)) y1predTest = pres_notPres_clf.predict(X_test) #print(y1predTest.shape) #print(y1predTest[:30]) no_pres_idxs = np.where(y1predTest == 3)[0]
print("Mean:",lrmses.mean()) print("Standard Deviation:", lrmses.std()) fr=rfr() fr.fit(housing_final, housing_labels) housing_predictions=fr.predict(housing_final) fmse=rmse(housing_labels, housing_predictions) frmse=np.sqrt(fmse) print(frmse) fscores=cvs(fr, housing_final, housing_labels, scoring="neg_mean_squared_error", cv=10) frmses=np.sqrt(-fscores) print("RandomForestRegressor") print("Scores:",frmses) print("Mean:",frmses.mean()) print("Standard Deviation:", frmses.std()) pgrid=[{'n_estimators':[3,10,30],'max_features':[2,4,6,8]},{'bootstrap':[False], 'n_estimators':[3,10], 'max_features':[2,3,4]}] gs=gsc(fr, pgrid, cv=5, scoring="neg_mean_squared_error", return_train_score=True) gs.fit(housing_final, housing_labels) print(gs.best_params_) print(gs.best_estimator_) cres=gs.cv_results_ for ms, ps in zip(cres["mean_test_score"],cres["params"]): print(np.sqrt(-ms), ps) fi=gs.best_estimator_.feature_importances_ print(fi) ea=["roomsperhousehold", "popperhouse", "bedperroom "] ce=fp.named_transformers_["cat"] c1hotatt=list(ce.categories_[0]) attr=nattr+ea+c1hotatt print(sorted(zip(fi, attr), reverse=True)) final_model=gs.best_estimator_