def gs(): pipeline = Pipeline([ ('vect', TfidfVectorizer(stop_words = 'english')), ('reg', LogisticRegression()) ]) parameters = { 'vect__max_df': (0.25, 0.5, 0.75), 'vect__ngram_range': ((1, 1), (1, 2), (1,3)), 'vect__use_idf': (True, False), 'reg__C':(1, 10, 100) } grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, scoring='accuracy') grid_search.fit(X_train_clean, Y_train) print('Best score: %0.3f' % grid_search.best_score_) print ('Best parameters set:') best_parameters = grid_search.best_estimator_.get_params() for param_name in sorted(parameters.keys()): print ('\t%s: %r' % (param_name, best_parameters[param_name])) print('CR: %s'%classification_report(Y_test, grid_search.predict(X_test_clean))) print('R Square: %s'%grid_search.score(X_test_clean, Y_test)) print('Mean sqared error: %s'%msq(Y_test, grid_search.predict(X_test_clean)))
X = poly.fit_transform(X) scale = RS() fits = scale.fit(X) rs = pd.DataFrame(fits.transform(X)) rs['target'] = y robust = rs.dropna(subset=['target']) train_df, test_df = train_test_split(robust) X_train = train_df.drop('target',axis=1) y_train = train_df['target'] X_test = test_df.drop('target',axis=1) y_test = test_df['target'] # LOGISTIC REGRESSION log = LOG() log.fit(X_train, y_train) log_msq = msq(log.predict(X_test), y_test) log_r2 = r2(log.predict(X_test), y_test) print('\nThe mean squared error of the Logistic Regression model is: \t\t%s'%log_msq) print('The R2 score of the Logistic Regression model is: \t\t\t%s'%log_r2) #parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]} #parameters = {'kernel':('linear', 'rbf', 'poly', 'sigmoid', 'precomputed'), 'C':[1,5,10], 'gamma':[0.001, 0.0001]} parameters = {'kernel':('linear', 'rbf', 'poly', 'sigmoid'), 'C':[1,5,10], 'gamma':[0.001, 0.0001]} #svc = svm.SVC(C=1, parameters) #clf_log = GridSearchCV(svc, parameters, cv=5) svc = svm.SVC(gamma="scale") clf_log = GridSearchCV(svc, parameters, cv=5) clf_log.fit(X_test, y_test) print('Best score for iris dataset with a polynomial transform and a robust scaler is:', clf_log.best_score_) #clfs_log = sorted(clf_log.cv_results_.keys())
plt.matshow(confusion_matrix) plt.title('Sentiment Analysis from reviews') plt.ylabel('True Values') plt.xlabel('Predicted Values') plt.colorbar() plt.show() # %% #recall precision accuracy and f1 scores print('Accuracy: %s' % accuracy_score(Y_test, regressor.predict(X_test))) #print('Recall: %s'%recall_score(Y_test, regressor.predict(X_test), average='macro')) #print('Precision: %s'%precision_score(Y_test, regressor.predict(X_test), average='macro')) #print('F1: %s'%f1_score(Y_test, regressor.predict(X_test), average='macro')) print('CR: %s' % classification_report(Y_test, regressor.predict(X_test))) print('R Square: %s' % regressor.score(X_test, Y_test)) print('Mean sqared error: %s' % msq(Y_test, regressor.predict(X_test))) ### USING GRID SEARCH ### (called only when the funtion main is called) # %% #CROSS VAL SCORE #print('Cross Val Score: %s'%cross_val_score(regressor, X_vec, Y, cv=5)) # %% def main(): pipeline = Pipeline([('vect', TfidfVectorizer(stop_words='english')), ('reg', LogisticRegression())]) parameters = { 'vect__max_df': (0.25, 0.5, 0.75), 'vect__ngram_range': ((1, 1), (1, 2), (1, 3)), 'vect__use_idf': (True, False)
y, test_size=0.5, random_state=2) #log = LOG() #poly = POLY(3) #scale = RS() #Train a logistic regression model with a polynomilal transform and a robust scalar pipeline = Pipeline(steps=[('rs', RS()), ('poly', POLY(degree=2)), ('logistic', LOG())]) pipeline.fit(X_train, y_train) log_msq = msq(pipeline.predict(X_test), y_test) log_r2 = r2(pipeline.predict(X_test), y_test) print('\nThe mean squared error of the Logistic Regression model is: \t\t%s' % log_msq) print('The R2 score of the Logistic Regression model is: \t\t\t%s' % log_r2) #pipe = make_pipeline(TfidfVectorizer(), LogisticRegression()) parameters = { 'poly__degree': [1, 2, 5, 10], 'logistic__C': [1, 2, 5, 10], 'logistic__dual': [True, False], 'rs__with_centering': [True, False] } #param_grid = {"logisticregression_C": [0.001, 0.01, 0.1, 1, 10, 100], "tfidfvectorizer_ngram_range": [(1,1), (1,2), (1,3)]} search = GridSearchCV(pipeline, parameters, cv=5, n_jobs=-1, verbose=1) search.fit(X_train, y_train)
y_test = test_df['target'] # ============================================================================= # LINEAR MODEL CLASSIFICATIONS # ============================================================================= from sklearn.linear_model import LinearRegression as LIN from sklearn.linear_model import Ridge from sklearn.linear_model import Lasso from sklearn.linear_model import ElasticNet as ENET from sklearn.linear_model import LogisticRegression as LOG # LINEAR REGRESSION lin = LIN() lin.fit(X_train, y_train) lin_msq = msq(lin.predict(X_test), y_test) lin_r2 = r2(lin.predict(X_test), y_test) print('\nThe mean squared error of the Linear Regression model is: \t\t%s' % lin_msq) print('The R2 score of the Linear Regression model is: \t\t\t%s' % lin_r2) # LOGISTIC REGRESSION log = LOG() log.fit(X_train, y_train) log_msq = msq(log.predict(X_test), y_test) log_r2 = r2(log.predict(X_test), y_test) print('\nThe mean squared error of the Logistic Regression model is: \t\t%s' % log_msq) print('The R2 score of the Logistic Regression model is: \t\t\t%s' % log_r2) # RIDGE CLASSIFICATION
x = input("enter the value you want to check:") x = x.split() print(dsc.predict([x])) # Linear Regression from sklearn.linear_model import LinearRegression lr = LinearRegression() lr.fit(X_train,Y_train) print(lr.coef_) print(len(lr.coef_),lr.intercept_) X_plot = X_train.iloc[:, 1] Y_plot = X_plot*lr.coef_[0] + lr.intercept_ Y_predicted = lr.predict(X_test) for i,j in zip(Y_predicted,Y_test): print(i,j) from sklearn.metrics import mean_squared_error as msq print("MSQ="+str(msq(Y_test,Y_predicted))) from matplotlib import pyplot as plt plt.plot(X_plot,Y_plot) plt.scatter(Y_test,Y_predicted) plt.xlabel("Actual") plt.ylabel("Predicted") plt.show() # knn from sklearn.neighbors import KNeighborsClassifier as knn knn_model = knn(n_neighbors=10) knn_model.fit(X_train,Y_train) Y_predicted = knn_model.predict(X_test) correct_output=0 for i,j in zip(Y_test,Y_predicted): print(i,j) if i==j: