class RidgeClassifierCVImpl(): def __init__(self, alphas=[0.1, 1.0, 10.0], fit_intercept=True, normalize=False, scoring=None, cv=None, class_weight='balanced', store_cv_values=False): self._hyperparams = { 'alphas': alphas, 'fit_intercept': fit_intercept, 'normalize': normalize, 'scoring': scoring, 'cv': cv, 'class_weight': class_weight, 'store_cv_values': store_cv_values } def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self def predict(self, X): return self._sklearn_model.predict(X)
def _test_ridge_classifiers(filter_): n_classes = np.unique(y_iris).shape[0] n_features = X_iris.shape[1] for reg in (RidgeClassifier(), RidgeClassifierCV()): reg.fit(filter_(X_iris), y_iris) assert_equal(reg.coef_.shape, (n_classes, n_features)) y_pred = reg.predict(filter_(X_iris)) assert_greater(np.mean(y_iris == y_pred), .79) cv = KFold(5) reg = RidgeClassifierCV(cv=cv) reg.fit(filter_(X_iris), y_iris) y_pred = reg.predict(filter_(X_iris)) assert_true(np.mean(y_iris == y_pred) >= 0.8)
def _test_ridge_classifiers(filter_): n_classes = np.unique(y_iris).shape[0] n_features = X_iris.shape[1] for clf in (RidgeClassifier(), RidgeClassifierCV()): clf.fit(filter_(X_iris), y_iris) assert_equal(clf.coef_.shape, (n_classes, n_features)) y_pred = clf.predict(filter_(X_iris)) assert_greater(np.mean(y_iris == y_pred), .79) n_samples = X_iris.shape[0] cv = KFold(n_samples, 5) clf = RidgeClassifierCV(cv=cv) clf.fit(filter_(X_iris), y_iris) y_pred = clf.predict(filter_(X_iris)) assert_true(np.mean(y_iris == y_pred) >= 0.8)
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def test_class_weights_cv(): # Test class weights for cross validated ridge classifier. X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] clf = RidgeClassifierCV(class_weight=None, alphas=[.01, .1, 1]) clf.fit(X, y) # we give a small weights to class 1 clf = RidgeClassifierCV(class_weight={1: 0.001}, alphas=[.01, .1, 1, 10]) clf.fit(X, y) assert_array_equal(clf.predict([[-.2, 2]]), np.array([-1]))
def __init__(self, alphas=[0.1, 1.0, 10.0], fit_intercept=True, normalize=False, scoring=None, cv=None, class_weight='balanced', store_cv_values=False): self._hyperparams = { 'alphas': alphas, 'fit_intercept': fit_intercept, 'normalize': normalize, 'scoring': scoring, 'cv': cv, 'class_weight': class_weight, 'store_cv_values': store_cv_values} self._wrapped_model = Op(**self._hyperparams)
def test_ridge_classifier_cv_store_cv_values(): x = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = np.array([1, 1, 1, -1, -1]) n_samples = x.shape[0] alphas = [1e-1, 1e0, 1e1] n_alphas = len(alphas) r = RidgeClassifierCV(alphas=alphas, cv=None, store_cv_values=True) # with len(y.shape) == 1 n_targets = 1 r.fit(x, y) assert r.cv_values_.shape == (n_samples, n_targets, n_alphas) # with len(y.shape) == 2 y = np.array([[1, 1, 1, -1, -1], [1, -1, 1, -1, 1], [-1, -1, 1, -1, -1]]).transpose() n_targets = y.shape[1] r.fit(x, y) assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)
def test_class_weights_cv(): # Test class weights for cross validated ridge classifier. X = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] clf = RidgeClassifierCV(class_weight=None, alphas=[0.01, 0.1, 1]) clf.fit(X, y) # we give a small weights to class 1 clf = RidgeClassifierCV(class_weight={1: 0.001}, alphas=[0.01, 0.1, 1, 10]) clf.fit(X, y) assert_array_equal(clf.predict([[-0.2, 2]]), np.array([-1]))
'NearestNeighbors': (True, KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=None) ), # (n_neighbors=4) ), 'LogisticRegressionCV': (True, LogisticRegressionCV(Cs=10, fit_intercept=True, cv='warn', dual=False, penalty='l2', scoring=None, solver='lbfgs', tol=0.0001, max_iter=100, class_weight=None, n_jobs=None, verbose=0, refit=True, intercept_scaling=1.0, multi_class='warn', random_state=None, l1_ratios=None) ), 'LDA': (True, LinearDiscriminantAnalysis(solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=0.0001) ), 'LogisticRegression': (True, LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='warn', max_iter=100, multi_class='warn', verbose=0, warm_start=False, n_jobs=None, l1_ratio=None) ), 'CalibratedClassifierCV': (True, CalibratedClassifierCV(base_estimator=None, method='sigmoid', cv='warn') ), 'LinearSVC': (True, LinearSVC(penalty='l2', loss='squared_hinge', dual=True, tol=0.0001, C=1.0, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000) ), 'LinearSVM': ( True, SVC(kernel='linear', C=0.025) ), # (C=0.01, penalty='l1', dual=False) ), 'RBF_SVM': (True, SVC(gamma='auto') ),#gamma=2, C=1) ), # 'Nu_SVM': (True, NuSVC(gamma='auto') ), 'GaussianProcess': (False, GaussianProcessClassifier() ), #(1.0 * RBF(1.0)) ), 'NeuralNet': (True, MLPClassifier(alpha=1, max_iter=1000) ), 'QDA': (True, QuadraticDiscriminantAnalysis() ), 'NaiveBayes': (True, GaussianNB() ), 'RadiusNeighborsClassifier': (True, RadiusNeighborsClassifier() ), 'SGDClassifier': (True, SGDClassifier() ), 'RidgeClassifierCV': (True, RidgeClassifierCV() ), 'RidgeClassifier': (True, RidgeClassifier() ), 'PassiveAggressiveClassifier': (True, PassiveAggressiveClassifier() ), 'LabelPropagation': (True, LabelPropagation() ), 'LabelSpreading': (False, LabelSpreading() ), 'MultinomialNB': (True, MultinomialNB() ), 'NearestCentroid': (True, NearestCentroid() ), 'Perceptron': (True, Perceptron() ), } # feature_set is used for manually enabling the individual features. # NOTE: setting boolean value, eanbles/disables feature. feature_set = { 'backers_count': True, 'converted_pledged_amount': True,
from sklearn.metrics.scorer import check_scoring from .._utils import CacheMixin from .._utils.cache_mixin import _check_memory from .._utils.param_validation import (_adjust_screening_percentile, check_feature_screening) from ..input_data.masker_validation import check_embedded_nifti_masker SUPPORTED_ESTIMATORS = dict( svc_l1=LinearSVC(penalty='l1', dual=False, max_iter=1e4), svc_l2=LinearSVC(penalty='l2', max_iter=1e4), svc=LinearSVC(penalty='l2', max_iter=1e4), logistic_l1=LogisticRegression(penalty='l1', solver='liblinear'), logistic_l2=LogisticRegression(penalty='l2', solver='liblinear'), logistic=LogisticRegression(penalty='l2', solver='liblinear'), ridge_classifier=RidgeClassifierCV(), ridge_regressor=RidgeCV(), ridge=RidgeCV(), svr=SVR(kernel='linear', max_iter=1e4), ) def _check_param_grid(estimator, X, y, param_grid=None): """Check param_grid and return sensible default if param_grid is None. Parameters ----------- estimator: str, optional The estimator to choose among: 'svc', 'svc_l2', 'svc_l1', 'logistic', 'logistic_l1', 'logistic_l2', 'ridge', 'ridge_classifier', 'ridge_regressor', and 'svr'. Note that the 'svc' and 'svc_l2';
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.svm import LinearSVC from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegressionCV from sklearn.naive_bayes import MultinomialNB from sklearn.neighbors import NearestCentroid from sklearn.svm import NuSVC from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis from sklearn.svm import SVC from sklearn.feature_selection import SelectKBest, f_classif, chi2, mutual_info_classif, RFE, RFECV from sklearn.preprocessing import RobustScaler, StandardScaler from sklearn.datasets import make_classification random_state = 42 classifiers = [ RidgeClassifierCV(), RandomForestClassifier(), SVC(), DecisionTreeClassifier(), BaggingClassifier(), LogisticRegressionCV(), ExtraTreeClassifier(), SGDClassifier(), RidgeClassifier(), PassiveAggressiveClassifier(), AdaBoostClassifier(), GradientBoostingClassifier(), ExtraTreesClassifier(), LogisticRegression(), KNeighborsClassifier(), GaussianProcessClassifier(),
def result(): if request.method == 'POST': path = request.files.get('myFile') df = pd.read_csv(path, encoding="ISO-8859-1") filename = request.form['filename'] str1 = request.form['feature'] str2 = request.form['label'] if str1 in list(df) and str2 in list(df): y = df[str2] X = df[str1] else: return render_template('nameError.html') x = [] for subject in X: result = re.sub(r"http\S+", "", subject) replaced = re.sub(r'[^a-zA-Z0-9 ]+', '', result) x.append(replaced) X = pd.Series(x) X = X.str.lower() """ texts = [] for doc in X: doc = nlp(doc, disable=['parser', 'ner']) tokens = [tok.lemma_.lower().strip() for tok in doc if tok.lemma_ != '-PRON-'] tokens = [tok for tok in tokens if tok not in stopwords] tokens = ' '.join(tokens) texts.append(tokens) X = pd.Series(texts) """ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) tfidfvect = TfidfVectorizer(ngram_range=(1, 1)) X_train_tfidf = tfidfvect.fit_transform(X_train) start = time() clf1 = LinearSVC() clf1.fit(X_train_tfidf, y_train) pred_SVC = clf1.predict(tfidfvect.transform(X_test)) a1 = accuracy_score(y_test, pred_SVC) end = time() print("accuracy SVC: {} and time: {} s".format(a1, (end - start))) start = time() clf2 = LogisticRegression(n_jobs=-1, multi_class='multinomial', solver='newton-cg') clf2.fit(X_train_tfidf, y_train) pred_LR = clf2.predict(tfidfvect.transform(X_test)) a2 = accuracy_score(y_test, pred_LR) end = time() print("accuracy LR: {} and time: {}".format(a2, (end - start))) start = time() clf3 = RandomForestClassifier(n_jobs=-1) clf3.fit(X_train_tfidf, y_train) pred = clf3.predict(tfidfvect.transform(X_test)) a3 = accuracy_score(y_test, pred) end = time() print("accuracy RFC: {} and time: {}".format(a3, (end - start))) start = time() clf4 = MultinomialNB() clf4.fit(X_train_tfidf, y_train) pred = clf4.predict(tfidfvect.transform(X_test)) a4 = accuracy_score(y_test, pred) end = time() print("accuracy MNB: {} and time: {}".format(a4, (end - start))) start = time() clf5 = GaussianNB() clf5.fit(X_train_tfidf.toarray(), y_train) pred = clf5.predict(tfidfvect.transform(X_test).toarray()) a5 = accuracy_score(y_test, pred) end = time() print("accuracy GNB: {} and time: {}".format(a5, (end - start))) start = time() clf6 = LogisticRegressionCV(n_jobs=-1) clf6.fit(X_train_tfidf, y_train) pred_LR = clf6.predict(tfidfvect.transform(X_test)) a6 = accuracy_score(y_test, pred_LR) end = time() print("accuracy LRCV: {} and time: {}".format(a6, (end - start))) start = time() clf7 = AdaBoostClassifier() clf7.fit(X_train_tfidf, y_train) pred_LR = clf7.predict(tfidfvect.transform(X_test)) a7 = accuracy_score(y_test, pred_LR) end = time() print("accuracy ABC: {} and time: {}".format(a7, (end - start))) start = time() clf8 = BernoulliNB() clf8.fit(X_train_tfidf.toarray(), y_train) pred = clf8.predict(tfidfvect.transform(X_test).toarray()) a8 = accuracy_score(y_test, pred) end = time() print("accuracy BNB: {} and time: {}".format(a8, (end - start))) start = time() clf9 = Perceptron(n_jobs=-1) clf9.fit(X_train_tfidf.toarray(), y_train) pred = clf9.predict(tfidfvect.transform(X_test).toarray()) a9 = accuracy_score(y_test, pred) end = time() print("accuracy Per: {} and time: {}".format(a9, (end - start))) start = time() clf10 = RidgeClassifierCV() clf10.fit(X_train_tfidf.toarray(), y_train) pred = clf10.predict(tfidfvect.transform(X_test).toarray()) a10 = accuracy_score(y_test, pred) end = time() print("accuracy RidCV: {} and time: {}".format(a10, (end - start))) start = time() clf11 = SGDClassifier(n_jobs=-1) clf11.fit(X_train_tfidf.toarray(), y_train) pred = clf11.predict(tfidfvect.transform(X_test).toarray()) a11 = accuracy_score(y_test, pred) end = time() print("accuracy SGDC: {} and time: {}".format(a11, (end - start))) start = time() clf12 = SGDClassifier(n_jobs=-1) clf12.fit(X_train_tfidf.toarray(), y_train) pred = clf12.predict(tfidfvect.transform(X_test).toarray()) a12 = accuracy_score(y_test, pred) end = time() print("accuracy XGBC: {} and time: {}".format(a12, (end - start))) acu_list = [a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12] max_list = max(acu_list) if max_list == a1: pickle.dump(clf1, open(filename + '_model', 'wb')) elif max_list == a2: pickle.dump(clf2, open(filename + '_model', 'wb')) elif max_list == a3: pickle.dump(clf3, open(filename + '_model', 'wb')) elif max_list == a4: pickle.dump(clf4, open(filename + '_model', 'wb')) elif max_list == a5: pickle.dump(clf5, open(filename + '_model', 'wb')) elif max_list == a6: pickle.dump(clf6, open(filename + '_model', 'wb')) elif max_list == a7: pickle.dump(clf7, open(filename + '_model', 'wb')) elif max_list == a8: pickle.dump(clf8, open(filename + '_model', 'wb')) elif max_list == a9: pickle.dump(clf9, open(filename + '_model', 'wb')) elif max_list == a10: pickle.dump(clf10, open(filename + '_model', 'wb')) elif max_list == a11: pickle.dump(clf11, open(filename + '_model', 'wb')) elif max_list == a12: pickle.dump(clf12, open(filename + '_model', 'wb')) pickle.dump(tfidfvect, open(filename + '_tfidfVect', 'wb')) return render_template("result.html", ac1=a1, ac2=a2, ac3=a3, ac4=a4, ac5=a5, ac6=a6, ac7=a7, ac8=a8, ac9=a9, ac10=a10, ac11=a11, ac12=a12)
'Perceptron':Perceptron(), 'ProjectedGradientNMF':ProjectedGradientNMF(), 'QuadraticDiscriminantAnalysis':QuadraticDiscriminantAnalysis(), 'RANSACRegressor':RANSACRegressor(), 'RBFSampler':RBFSampler(), 'RadiusNeighborsClassifier':RadiusNeighborsClassifier(), 'RadiusNeighborsRegressor':RadiusNeighborsRegressor(), 'RandomForestClassifier':RandomForestClassifier(), 'RandomForestRegressor':RandomForestRegressor(), 'RandomizedLasso':RandomizedLasso(), 'RandomizedLogisticRegression':RandomizedLogisticRegression(), 'RandomizedPCA':RandomizedPCA(), 'Ridge':Ridge(), 'RidgeCV':RidgeCV(), 'RidgeClassifier':RidgeClassifier(), 'RidgeClassifierCV':RidgeClassifierCV(), 'RobustScaler':RobustScaler(), 'SGDClassifier':SGDClassifier(), 'SGDRegressor':SGDRegressor(), 'SVC':SVC(), 'SVR':SVR(), 'SelectFdr':SelectFdr(), 'SelectFpr':SelectFpr(), 'SelectFwe':SelectFwe(), 'SelectKBest':SelectKBest(), 'SelectPercentile':SelectPercentile(), 'ShrunkCovariance':ShrunkCovariance(), 'SkewedChi2Sampler':SkewedChi2Sampler(), 'SparsePCA':SparsePCA(), 'SparseRandomProjection':SparseRandomProjection(), 'SpectralBiclustering':SpectralBiclustering(),
"ExtraTreesAudit") build_audit( GradientBoostingClassifier(random_state=13, loss="exponential", init=None), "GradientBoostingAudit") build_audit(LinearDiscriminantAnalysis(solver="lsqr"), "LinearDiscriminantAnalysisAudit") build_audit(LogisticRegressionCV(), "LogisticRegressionAudit") build_audit( BaggingClassifier(LogisticRegression(), random_state=13, n_estimators=3, max_features=0.5), "LogisticRegressionEnsembleAudit") build_audit(GaussianNB(), "NaiveBayesAudit") build_audit(RandomForestClassifier(random_state=13, min_samples_leaf=5), "RandomForestAudit") build_audit(RidgeClassifierCV(), "RidgeAudit", with_proba=False) build_audit( BaggingClassifier(RidgeClassifier(random_state=13), random_state=13, n_estimators=3, max_features=0.5), "RidgeEnsembleAudit") build_audit( VotingClassifier([("dt", DecisionTreeClassifier(random_state=13)), ("nb", GaussianNB()), ("lr", LogisticRegression())], voting="soft", weights=[3, 1, 2]), "VotingEnsembleAudit") build_audit(XGBClassifier(objective="binary:logistic"), "XGBAudit") versicolor_df = load_csv("Versicolor.csv") print(versicolor_df.dtypes)
C5 = RandomForestClassifier() C6 = XGBClassifier() C7 = RidgeClassifier() C8 = KNeighborsClassifier() C9 = AdaBoostClassifier() C10 = MLPClassifier(alpha=1, max_iter=1000) C11 = RidgeClassifier() C12 = BaggingClassifier() C13 = ExtraTreesClassifier() C14 = XGBRFClassifier() C15 = GradientBoostingClassifier() C16 = GaussianNB() C17 = HistGradientBoostingClassifier() C18 = KNeighborsClassifier() C19 = SVC() C20 = RidgeClassifierCV() Cm = LogisticRegression(max_iter=3000, C=0.2) Cm1 = LogisticRegression(max_iter=3000, C=0.4) Cm2 = LogisticRegression(max_iter=3000, C=0.6) Cm3 = LogisticRegression(max_iter=3000, C=0.8) Cm4 = LogisticRegression(max_iter=3000, C=1) names = [ 'XGBClassifier', 'RidgeClassifier', 'RidgeClassifierCV', 'HistGradientBoostingClassifier', 'GradientBoostingClassifier', 'BaggingClassifier', 'ExtraTreesClassifier', 'XGBRFClassifier', 'GaussianNB', 'AdaBoostClassifier', 'DecisionTreeClassifier', 'CatBoostClassifier', 'MLPClassifier', 'RandomForestClassifier', 'KNeighborsClassifier', 'Stack1', '', '', '', '', '', '', '' ] classifiers = [C1, C2, C3, C4] classifiers_0 = [C5, C6, C7, C8]
def all_classifier_models(): models = [] metrix = [] c_report = [] train_accuracy = [] test_accuracy = [] models.append(('LogisticRegression', LogisticRegression(solver='liblinear', multi_class='ovr'))) models.append(('LinearDiscriminantAnalysis', LinearDiscriminantAnalysis())) models.append(('KNeighborsClassifier', KNeighborsClassifier())) models.append(('DecisionTreeClassifier', DecisionTreeClassifier())) models.append(('GaussianNB', GaussianNB())) models.append(('RandomForestClassifier', RandomForestClassifier(n_estimators=100))) models.append(('SVM', SVC(gamma='auto'))) models.append(('Linear_SVM', LinearSVC())) models.append(('XGB', XGBClassifier())) models.append(('SGD', SGDClassifier())) models.append(('Perceptron', Perceptron())) models.append(('ExtraTreeClassifier', ExtraTreeClassifier())) models.append(('OneClassSVM', OneClassSVM(gamma = 'auto'))) models.append(('NuSVC', NuSVC())) models.append(('MLPClassifier', MLPClassifier(solver='lbfgs', alpha=1e-5, random_state=1))) models.append(('RadiusNeighborsClassifier', RadiusNeighborsClassifier(radius=2.0))) models.append(('OutputCodeClassifier', OutputCodeClassifier(estimator=RandomForestClassifier(random_state=0),random_state=0))) models.append(('OneVsOneClassifier', OneVsOneClassifier(estimator = RandomForestClassifier(random_state=1)))) models.append(('OneVsRestClassifier', OneVsRestClassifier(estimator = RandomForestClassifier(random_state=1)))) models.append(('LogisticRegressionCV', LogisticRegressionCV())) models.append(('RidgeClassifierCV', RidgeClassifierCV())) models.append(('RidgeClassifier', RidgeClassifier())) models.append(('PassiveAggressiveClassifier', PassiveAggressiveClassifier())) models.append(('GaussianProcessClassifier', GaussianProcessClassifier())) models.append(('HistGradientBoostingClassifier', HistGradientBoostingClassifier())) estimators = [('rf', RandomForestClassifier(n_estimators=10, random_state=42)),('svr', make_pipeline(StandardScaler(),LinearSVC(random_state=42)))] models.append(('StackingClassifier', StackingClassifier(estimators=estimators, final_estimator=LogisticRegression()))) clf1 = LogisticRegression(multi_class='multinomial', random_state=1) clf2 = RandomForestClassifier(n_estimators=50, random_state=1) clf3 = GaussianNB() models.append(('VotingClassifier', VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard'))) models.append(('AdaBoostClassifier', AdaBoostClassifier())) models.append(('GradientBoostingClassifier', GradientBoostingClassifier())) models.append(('BaggingClassifier', BaggingClassifier())) models.append(('ExtraTreesClassifier', ExtraTreesClassifier())) models.append(('CategoricalNB', CategoricalNB())) models.append(('ComplementNB', ComplementNB())) models.append(('BernoulliNB', BernoulliNB())) models.append(('MultinomialNB', MultinomialNB())) models.append(('CalibratedClassifierCV', CalibratedClassifierCV())) models.append(('LabelPropagation', LabelPropagation())) models.append(('LabelSpreading', LabelSpreading())) models.append(('NearestCentroid', NearestCentroid())) models.append(('QuadraticDiscriminantAnalysis', QuadraticDiscriminantAnalysis())) models.append(('GaussianMixture', GaussianMixture())) models.append(('BayesianGaussianMixture', BayesianGaussianMixture())) test_accuracy= [] names = [] for name, model in models: try: m = model m.fit(X_train, y_train) y_pred = m.predict(X_test) train_acc = round(m.score(X_train, y_train) * 100, 2) test_acc = metrics.accuracy_score(y_test,y_pred) *100 c_report.append(classification_report(y_test, y_pred)) test_accuracy.append(test_acc) names.append(name) metrix.append([name, train_acc, test_acc]) except: print("Exception Occurred :",name) return metrix,test_accuracy,names