target = [] split = 0.67 loadDataset('spambase.data', split, data, target) X = data # we only take the first two features. We could # avoid this ugly slicing by using a two-dim dataset y = target h = .02 # step size in the mesh # we create an instance of SVM and fit out data. We do not scale our # data since we want to plot the support vectors C = 1.0 # SVM regularization parameter svc = svm.SVC().fit(X, y) rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(X, y) lin_svc = svm.LinearSVC(C=C).fit(X, y) Nu_svc = svm.NuSVC().fit(X, y) zero = [] one = [] for i in data: zero.append(i[0]) one.append(i[1]) #print zero #print one # create a mesh to plot in x_min, x_max = min(zero), max(zero) y_min, y_max = min(one), max(one) xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # title for the plots
linear_model.LogisticRegressionCV(), linear_model.PassiveAggressiveClassifier(), linear_model.RidgeClassifierCV(), linear_model.SGDClassifier(), linear_model.Perceptron(), # Navies Bayes naive_bayes.BernoulliNB(), naive_bayes.GaussianNB(), # Nearest Neighbor neighbors.KNeighborsClassifier(), # SVM svm.SVC(probability=True), svm.NuSVC(probability=True), svm.LinearSVC(), # Trees tree.DecisionTreeClassifier(), tree.ExtraTreeClassifier(), # Discriminant Analysis discriminant_analysis.LinearDiscriminantAnalysis(), discriminant_analysis.QuadraticDiscriminantAnalysis(), # xgboost: http://xgboost.readthedocs.io/en/latest/model.html XGBClassifier() ] from sklearn import feature_selection from sklearn import model_selection
from sklearn import svm, metrics from sklearn.model_selection import train_test_split # training, test data를 자동으로 나누는 라이브러리 ''' # 붗꽃 데이터 분류기(머신러닝) - 개요 : 150개 붗꽃 정보(꽃받침 길이, 꽃받침 폭, 꽃잎 길이, 꽃잎 폭) - 종류 : 3개 (Iris-setosa, Iris-vesicolor, Iris-virginica) - CSV 파일 : 검색 iris.csv ''' ##0. 훈련데이터, 테스트데이터 준비 csv = pd.read_csv("C:/BigData/iris.csv") data = csv.iloc[:, 0:-1] label = csv.iloc[:, [-1]] ## 학습용, 훈련용 분리 train_data, test_data, train_label, test_label = \ train_test_split(data,label, train_size =0.6) #1. Classifier 생성(선택) --> 머신러닝 알고리즘 선택 clf = svm.NuSVC(gamma="auto") # clf는 classifier의 약자 #2. 데이터로 학습 시키기 #clf.fit( [ 훈련데이터 ], [ 정답 ]) clf.fit(train_data, train_label) #3. 정답률을 확인(신뢰도) results = clf.predict(test_data) score = metrics.accuracy_score(results, test_label) print("정답률 :", score * 100, '%') #4. 내꺼 예측하기 result = clf.predict([[4.1, 3.3, 1.5, 0.2]]) print(result)
regression(light_reg.LinearSVR(random_state=RANDOM_SEED)), classification( light_clf.LinearSVC(criterion="accuracy", random_state=RANDOM_SEED)), classification( light_clf.LinearSVC(criterion="auc", random_state=RANDOM_SEED)), classification_binary( light_clf.LinearSVC(criterion="accuracy", random_state=RANDOM_SEED)), classification_binary( light_clf.LinearSVC(criterion="auc", random_state=RANDOM_SEED)), # Sklearn SVM regression(svm.NuSVR(kernel="rbf")), regression(svm.SVR(kernel="rbf")), classification(svm.NuSVC(kernel="rbf", **SVC_PARAMS)), classification(svm.SVC(kernel="rbf", **SVC_PARAMS)), classification_binary(svm.NuSVC(kernel="rbf", **SVC_PARAMS)), classification_binary(svm.SVC(kernel="linear", **SVC_PARAMS)), classification_binary( svm.SVC(kernel="poly", C=1.5, degree=2, gamma=0.1, coef0=2.0, **SVC_PARAMS)), classification_binary(svm.SVC(kernel="rbf", **SVC_PARAMS)), classification_binary(svm.SVC(kernel="sigmoid", **SVC_PARAMS)), # Lightning SVM classification(
def svm_nusvc(X, y): clf = svm.NuSVC() return clf.fit(X, y)
def train(self, X, T, kernel, deg, param): svc = svm.NuSVC(nu=param, kernel=kernel, degree=deg) svc.fit(X, T) self.model = svc
assert_array_almost_equal(clf.predict(X), [2] * 6) X_, y_ = make_classification(n_samples=200, n_features=10, weights=[0.833, 0.167], random_state=2) for clf in (linear_model.LogisticRegression(), svm.LinearSVC(random_state=0), svm.SVC()): clf.set_params(class_weight={0: .1, 1: 10}) clf.fit(X_[:100], y_[:100]) y_pred = clf.predict(X_[100:]) assert f1_score(y_[100:], y_pred) > .3 @pytest.mark.parametrize("estimator", [svm.SVC(C=1e-2), svm.NuSVC()]) def test_svm_classifier_sided_sample_weight(estimator): # fit a linear SVM and check that giving more weight to opposed samples # in the space will flip the decision toward these samples. X = [[-2, 0], [-1, -1], [0, -2], [0, 2], [1, 1], [2, 0]] estimator.set_params(kernel='linear') # check that with unit weights, a sample is supposed to be predicted on # the boundary sample_weight = [1] * 6 estimator.fit(X, Y, sample_weight=sample_weight) y_pred = estimator.decision_function([[-1., 1.]]) assert y_pred == pytest.approx(0) # give more weights to opposed samples sample_weight = [10., .1, .1, .1, .1, 10]
w = clf.coef_[0] a = -w[0] / w[1] xx = np.linspace(0, 1) yy = a * xx - (clf.intercept_[0]) / w[1] plt.figure(0) plt.plot(xx, yy, 'k-') plt.plot(x[c == 1], y[c == 1], 'ro') plt.plot(x[c == 0], y[c == 0], 'bo') plt.title("C=" + str(C) + " ;gamma=" + str(gamma) + " ;score: " + str(score)) #-------------------------------------------------------- #Non Linear Classifier # fit the model tol = 0.001 clf = svm.NuSVC(tol=tol) X = xyc[:, :2] Y = xyc[:, 2] clf.fit(X, Y) score = clf.score(X, Y) print("Non linear score:", score) #Z=clf.decision_function(X) #print(Z) #Draw points of classification---------------------------------- h = .02 # step size in the mesh # create a mesh to plot in x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = Y.min() - 1, Y.max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Plot the decision boundary. For that, we will assign a color to each
def parse_para_and_get_model(param_dict): #param_dict = json.loads(ml_opts_jstr) model_name = param_dict['learning_algorithm'] # 1: linear_svm; 2: ; 3: ###parse and print print parameters### print "INFO: ============Learning Algorithm and Parameters=============" print "INFO: param_dict=", param_dict if model_name == "linear_svm": ### 1: linearSVM C = eval(param_dict['c']) C = float(C) print "INFO: Learning Algorithm: ", model_name print "INFO: C = ", C print "INFO: ====================1: Linear SVM=============" clf = svm.LinearSVC(C=C) elif model_name == "svm": ### 2: SVM with kernel C = eval(param_dict['c']) C = float(C) kernel_func = param_dict['kernel'] gamma_val = "0.0" if 'gamma' in param_dict: gamma_val = eval(param_dict['gamma']) gamma_val = float(gamma_val) print "INFO: Learning Algorithm: ", model_name print "INFO: C = ", C print "INFO: kernel = ", kernel_func print "INFO: gamma = ", gamma_val if kernel_func == "poly": degree_num = eval(param_dict['degree']) print "degree = ", degree_num print "INFO: ====================2: SVM with kernel=============" if kernel_func == "poly": clf = svm.SVC(C=C, kernel=kernel_func, gamma=gamma_val, degree=degree_num) elif kernel_func == "rbf" or kernel_func == "sigmoid": clf = svm.SVC(C=C, kernel=kernel_func, gamma=gamma_val) else: clf = svm.SVC(C=C, kernel=kernel_func) elif model_name == "nu_svm": ### 3: NuSVC nu_val = eval(param_dict['nu']) nu_val = float(nu_val) kernel_func = param_dict['kernel'] gamma_val = eval(param_dict['gamma']) gamma_val = float(gamma_val) print "INFO: Learning Algorithm: ", model_name print "INFO: nu = ", nu_val print "INFO: kernel = ", kernel_func print "INFO: gamma = ", gamma_val if kernel_func == "poly": degree_num = eval(param_dict['degree']) print "INFO: degree = ", degree_num print "INFO: ====================3: NuSVC=============" if kernel_func == "poly": clf = svm.NuSVC(nu=nu_val, kernel=kernel_func, gamma=gamma_val, degree=degree_num) elif kernel_func == "rbf" or kernel_func == "sigmoid": clf = svm.NuSVC(nu=nu_val, kernel=kernel_func, gamma=gamma_val) else: clf = svm.NuSVC(nu=nu_val, kernel=kernel_func) elif model_name == "logistic_regression": ### 4: linearSVM C = eval(param_dict['c']) C = float(C) # penalty from CV, regularization from non-CV training if 'regularization' in param_dict: regularization = param_dict['regularization'] elif 'penalty' in param_dict: regularization = param_dict['penalty'] print "INFO: Learning Algorithm: ", model_name print "INFO: C = ", C print "INFO: penalty = ", regularization print "INFO: ====================4: Logistic Regression=============" clf = linear_model.LogisticRegression(C=C, penalty=regularization) elif model_name == "linear_svm_with_sgd": ### 5: linearSVM with SGD, no para as input print "INFO: Learning Algorithm: ", model_name print "INFO: ====================5: Linear SVM with SGD=============" clf = linear_model.SGDClassifier() elif model_name == "passive_aggressive_classifier": ### 6: Passive Aggressive Classifier C = eval(param_dict['c']) C = float(C) print "INFO: Learning Algorithm: ", model_name print "INFO: C = ", C print "INFO: ====================6: Passive Aggressive Classifier=============" clf = linear_model.PassiveAggressiveClassifier(C=C) elif model_name == "perceptron": ### 7: Perceptron print "INFO: Learning Algorithm: ", model_name print "INFO: ====================7: Perceptron=============" clf = linear_model.Perceptron() else: print "INFO: Training model selection error: no valid ML model selected!" return (0, "none") return (clf, model_name)
def fit_SVM(X_train, y_train, _gamma="auto"): from sklearn import svm clf = svm.NuSVC(gamma=_gamma) clf.fit(X_train, y_train.ravel()) return clf
def run(X, y, g): clf = svm.NuSVC(gamma=g) clf.fit(X, y) return clf
def parse_param_and_get_model(param_dict): #param_dict = json.loads(j_str) model_name = param_dict['learning_algorithm'] # 1: linear_svm; 2: ; 3: cv = eval(param_dict['cv']) mode = param_dict['mode'] api = param_dict['api'] print "INFO: Learning Algorithm: ", model_name print "INFO: CV = ", cv print "INFO: mode = ", mode print "INFO: API use: ", api ###parse and print print parameters### print "INFO: ============ Learning Algorithm and Grid Search Parameters =============" if model_name == "linear_svm": ### 1: linearSVM if mode == "cheap": param_dic = [{'C': [0.0001, 0.01, 1, 100, 10000]}] else: param_dic = [{ 'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000] }] print "INFO: Grid Search Parameters:" print "INFO: C = ", param_dic[0]['C'] print "INFO: ====================1: Linear SVM=============" clf = svm.LinearSVC() elif model_name == "svm": ### 2: SVM with kernel if mode == "cheap": param_dic = [{ 'C': [0.01, 1, 100], 'kernel': ['rbf', 'sigmoid'], 'gamma': [0.0, 0.5] }, { 'C': [0.01, 1, 100], 'kernel': ['linear'] }, { 'C': [0.01, 1, 100], 'kernel': ['poly'], 'gamma': [0.0, 0.5], 'degree': [3] }] else: param_dic = [{ 'C': [0.0001, 0.01, 1, 100, 10000], 'kernel': ['rbf', 'sigmoid'], 'gamma': [0.0, 0.5, 1] }, { 'C': [0.0001, 0.01, 1, 100, 10000], 'kernel': ['linear'] }, { 'C': [0.0001, 0.01, 1, 100, 10000], 'kernel': ['poly'], 'gamma': [0.0, 0.5], 'degree': [2, 3] }] #param_dic = [{'C': [0.0001, 0.01, 1, 100, 10000], 'kernel':['rbf','sigmoid'], 'gamma':[0.0, 0.5, 1]}, {'C': [0.0001, 0.01, 1, 100, 10000], 'kernel':['linear']}, {'C': [0.0001, 0.01, 1, 100, 10000], 'kernel':['poly'], 'gamma':[0.0, 0.5, 1], 'degree':[2,3]}] #param_dic = [{'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel':['rbf','sigmoid'], 'gamma':[0.0, 0.01, 0.1, 1, 10, 100]}, {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel':['linear']}, {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel':['poly'], 'gamma':[0.0, 0.01, 0.1, 1, 10, 100], 'degree':[2,3,4]}] print "INFO: Grid Search Parameters:" for p in range(0, len(param_dic)): print "INFO: ", for key in param_dic[p]: print key, ' = ', param_dic[p][key], print "" print "INFO: ====================2: SVM with kernel=============" clf = svm.SVC() elif model_name == "nu_svm": ### 3: NuSVC if mode == "cheap": param_dic = [{ 'nu': [0.1, 0.3], 'kernel': ['rbf', 'sigmoid'], 'gamma': [0.0, 0.1] }, { 'nu': [0.1, 0.3], 'kernel': ['linear'] }, { 'nu': [0.1, 0.3], 'kernel': ['poly'], 'gamma': [0.0, 0.1], 'degree': [3] }] else: param_dic = [{ 'nu': [0.1, 0.2, 0.3], 'kernel': ['rbf', 'sigmoid'], 'gamma': [0.0, 0.1, 1, 10] }, { 'nu': [0.1, 0.2, 0.3], 'kernel': ['linear'] }, { 'nu': [0.1, 0.2, 0.3], 'kernel': ['poly'], 'gamma': [0.0, 0.1, 1, 10], 'degree': [2, 3] }] #param_dic = [{'nu': [0.1, 0.2, 0.3, 0.4], 'kernel':['rbf','sigmoid'], 'gamma':[0.0, 0.1, 1, 10]}, {'nu': [0.1, 0.2, 0.3, 0.4], 'kernel':['linear']}, {'nu': [0.1, 0.2, 0.3, 0.4], 'kernel':['poly'], 'gamma':[0.0, 0.1, 1, 10], 'degree':[2,3]}] print "INFO: Grid Search Parameters:" for p in range(0, len(param_dic)): print "INFO: ", for key in param_dic[p]: print key, ' = ', param_dic[p][key], print "" print "INFO: ====================3: NuSVC=============" clf = svm.NuSVC() elif model_name == "logistic_regression": ### 4: Logistic Regression if mode == "cheap": param_dic = [{ 'C': [0.0001, 0.01, 1, 100, 10000], 'penalty': ['l2'] }] else: param_dic = [{ 'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000], 'penalty': ['l2', 'l1'] }] print "INFO: Grid Search Parameters:" print "INFO: C= ", param_dic[0]['C'] print "INFO: penalty= ", param_dic[0]['penalty'] print "INFO: ====================4: Logistic Regression=============" clf = linear_model.LogisticRegression() elif model_name == "passive_aggressive_classifier": ### 6: Passive Aggressive Classifier if mode == "cheap": param_dic = [{'C': [0.0001, 0.01, 1, 100, 10000]}] else: param_dic = [{ 'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000] }] print "INFO: Grid Search Parameters:" print "INFO: C= ", param_dic[0]['C'] print "INFO: ====================6: Passive Aggressive Classifier=============" clf = linear_model.PassiveAggressiveClassifier() else: print "INFO: Training model selection error: no valid ML model selected!" return (0, "none", 0, 0, 0) return (clf, model_name, api, cv, param_dic)
def fit(self, X, Y, class_weight=None): self.classes = list(np.unique(Y)) nclasses = len(self.classes) total_nrows = X.shape[0] nfeatures = X.shape[1] if self.balanced_bagging: class_slices = [] class_outputs = [] for c in self.classes: mask = (Y == c) class_slices.append(X[mask, :]) class_outputs.append(Y[mask, :]) min_size = np.min([x.shape[0] for x in class_slices]) class_bag_size = int(min_size * self.bag_prct) print "Balanced bagging, min class size =", class_bag_size total_bag_size = (nclasses - 1) * class_bag_size + ( self.neutral_weight * class_bag_size) else: total_bag_size = total_nrows print "total_bag_size = ", total_bag_size if self.num_random_features == 'sqrt': features_per_model = int(math.ceil(math.sqrt(nfeatures))) elif self.num_random_features == 'log': features_per_model = int(math.ceil(math.log(nfeatures, 2))) else: features_per_model = int( math.ceil(nfeatures * self.num_random_features)) print "Features per model:", features_per_model print "[Class Weights]", class_weight f_scores = [] for i in xrange(self.nmodels): print "Training model #" + str(i) feature_indices = np.random.permutation( nfeatures)[:features_per_model] print " Features:", feature_indices if self.balanced_bagging: input_list = [] output_list = [] for i, c in enumerate(self.classes): x = class_slices[i] y = class_outputs[i] n = self.neutral_weight * class_bag_size if c == 0 else class_bag_size row_indices = np.random.permutation(x.shape[0])[:n] row_slice = x[row_indices, :] input_list.append(row_slice[:, feature_indices]) output_list.append(y[row_indices]) inputs = np.concatenate(input_list) outputs = np.concatenate(output_list) else: inputs = X[:, feature_indices] outputs = Y if self.base_classifier == 'sgd': print " Input shape:", inputs.shape n_iter = int(np.ceil(10**6 / float(inputs.shape[0]))) print " Num iters: ", n_iter model = lin.SGDClassifier(n_iter=n_iter, shuffle=True, **self.model_keywords) elif self.base_classifier == 'logistic': model = lin.LogisticRegression(**self.model_keywords) elif self.base_classifier == 'nu-svm': model = svm.NuSVC(nu=0.1, kernel='linear') elif self.base_classifier == 'svm_tree': model = treelearn.SVM_Tree(**self.model_keywords) else: model = svm.LinearSVC( **self.model_keywords) # svm.SVC(kernel='poly', degree=2) model.fit(inputs, outputs, class_weight=class_weight) print model #print model.coef_ # bug in scikits.learn keeps around sample weights after training, # making the serialization too bloated for network transfer if hasattr(model, 'sample_weight'): model.sample_weight = [] # remember the balanced accuracy for each model pred = model.predict(inputs) #print "outputs[100:150]", outputs[100:150] #print "pred[100:150]", pred[100:150] # compure F-score for model weighting and user feedback actual_not_zero = (outputs != 0) actual_not_zero_count = np.sum(actual_not_zero) pred_not_zero = (pred != 0) pred_not_zero_count = np.sum(pred_not_zero) correct = (outputs == pred) correct_not_zero = np.sum(correct & actual_not_zero, dtype='float') print " Correct NNZ:", correct_not_zero, "Actual NNZ: ", actual_not_zero_count, "Predicted NNZ:", pred_not_zero_count if pred_not_zero_count > 0: precision = correct_not_zero / float(pred_not_zero_count) else: precision = 0.0 if actual_not_zero_count > 0: recall = correct_not_zero / float(actual_not_zero_count) else: recall = 0.0 if precision > 0 and recall > 0: beta_squared = self.recall_importance**2 denom = beta_squared * precision + recall f_score = (1 + beta_squared) * (precision * recall) / denom else: f_score = 0.0 print " Precision:", precision, "Recall:", recall, "F-score:", f_score if f_score > 0: self.model_features.append(feature_indices) f_scores.append(f_score) self.models.append(model) if len(f_scores) == 0: print "!!! No good classifiers kept !!!" else: f_scores = np.array(f_scores) sum_f_scores = np.sum(f_scores) if sum_f_scores == 0: print "!!! All classifiers are terrible !!!" self.model_scores = f_scores else: self.model_scores = f_scores / sum_f_scores # estimate how good each feature is counts = np.zeros(nfeatures) feature_scores = np.zeros(nfeatures) for f, indices in zip(f_scores, self.model_features): counts[indices] += 1 feature_scores[indices] += f feature_scores /= counts print "Average feature scores:", feature_scores #sorted in ascending order sort_indices = np.argsort(feature_scores) print "Best 5 features:", sort_indices[-5:] if self.model_weighting == 'logistic': X2 = self.transform_to_classifer_space(X) print "Training logistic regression on top of ensemble outputs..." self.gating_classifier = lin.LogisticRegression() self.gating_classifier.fit(X2, Y, class_weight=class_weight) else: self.gating_classifier = None
from sklearn import svm import copy import numpy as np from sklearn.cross_validation import train_test_split from sklearn.ensemble import AdaBoostClassifier from sklearn.ensemble import BaggingClassifier rf = RandomForestClassifier(n_estimators=100) dt = tree.DecisionTreeClassifier() gnb = GaussianNB() bnb = BernoulliNB() mnb = MultinomialNB() svm_cf = svm.SVC() maxlist = list() data = list() nusvm = svm.NuSVC() adaboost = AdaBoostClassifier(n_estimators=100) def CVAndOutPutAccuracies(trainData, classData, fold_no): print "Length of labelled array : " + str(len(trainData)) print "Length of labels array : " + str(len(classData)) print "Feature length : " + str(len(trainData[0])) print "Max Accuracy, Mean Accuracy, Min Accuracy" rf_accuracy = cross_val_score(rf, trainData, classData, 'accuracy', fold_no) rf_f1_score = cross_val_score(rf, trainData, classData, 'f1_weighted', fold_no) print "Random Forest Accuracy scores :"
with codecs.open('emotions', 'r', 'utf8') as reader: for line in reader: word, rank = line.strip().split(' ') emotions[word] = int(rank) X, y = [], [] for key, val in vectors.items(): X.append(val) if key in emotions: y.append(2) elif key in topics: y.append(1) else: y.append(0) clf = svm.NuSVC(nu=0.005) clf.fit(X, y) joblib.dump(clf, 'attr_clf.pkl') X, y = [], [] for key, val in vectors.items(): if key in emotions: X.append(val) y.append(emotions[key]) clf = svm.NuSVC(nu=0.005) clf.fit(X, y) joblib.dump(clf, 'rank_clf.pkl')
from sklearn.cross_validation import train_test_split from sklearn import svm # load data from datasets iris = datasets.load_iris() iris_x = iris.data iris_y = iris.target # divide the datasets into train sets and test sets train_x, test_x, train_y, test_y = train_test_split(iris_x, iris_y, test_size=0.2) # define SVM svm_svc_clf = svm.SVC() svm_nusvc_clf = svm.NuSVC() svm_linear_clf = svm.LinearSVC() # train svm_svc_clf.fit(train_x, train_y) svm_nusvc_clf.fit(train_x, train_y) svm_linear_clf.fit(train_x, train_y) # predict svc_y = svm_svc_clf.predict(test_x) nusvc_y = svm_nusvc_clf.predict(test_x) linear_y = svm_linear_clf.predict(test_x) # accuracy svc_counter = 0 nusvc_counter = 0
def tune_model(X, y, n_it=30, models=['xgb']): seed = 7 test_size = 0.30 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed) for model1 in models: if model1 == 'Logistic': logistic = LogisticRegression() distributions = {'C': [1, 2, 3, 4], 'penalty': ['l1', 'l2']} clf = RandomizedSearchCV(logistic, distributions, random_state=0, n_iter=n_it, cv=5) clf.fit(X_train, y_train) # print(clf.best_params_) # print(clf.cv_results_) pred = clf.predict(X_test) print("The best Logistic Balanced Accuracy is ", balanced_accuracy_score(y_test, pred) * 100) elif model1 == 'xgb': model = XGBClassifier() distributions = { 'booster': ['gbtree', 'gblinear', 'dart'], 'eta': [0, 0.2, 0.4, 0.6, 0.8, 1], 'max_depth': [50, 100, 150, 200, 250, 300], 'lambda': [0, 0.2, 0.4, 0.6, 0.8, 1], 'alpha': [0, 0.2, 0.4, 0.6, 0.8, 1], 'grow_policy': ['depthwise', 'lossguide'] } clf = RandomizedSearchCV(model, distributions, random_state=0, n_iter=n_it, cv=5) clf.fit(X_train, y_train) pred = clf.predict(X_test) print("The best XGBoost Balanced Accuracy is ", balanced_accuracy_score(y_test, pred) * 100) elif model1 == 'SVM': model = svm.NuSVC(gamma='auto') distributions = { 'kernel': ['linear', 'rbf', 'poly', 'sigmoid'], 'degree': [4, 5, 6, 7, 8, 9, 10] } clf = RandomizedSearchCV(model, distributions, random_state=0, n_iter=n_it, cv=5) clf.fit(X_train, y_train) pred = clf.predict(X_test) print("The best SVM Balanced Accuracy is ", balanced_accuracy_score(y_test, pred) * 100) elif model1 == 'RandomForest': model = RandomForestClassifier() distributions = { 'n_estimators': [50, 100, 150, 200, 250, 300], 'criterion': ['gini', 'entropy'], 'min_samples_split': [2, 3, 4, 5], 'min_samples_leaf': [2, 3, 4, 5], 'max_features': ['auto', 'sqrt', 'log2'] } clf = RandomizedSearchCV(model, distributions, random_state=0, n_iter=n_it, cv=5) clf.fit(X_train, y_train) pred = clf.predict(X_test) print("The best Random Forest Balanced Accuracy is ", balanced_accuracy_score(y_test, pred) * 100) else: print( model1, "- Name not detected. Try using one of the models that are defined" ) # import sklearn.ensemble # import sklearn.model_selection # import sklearn.svm # import optuna # # FYI: Objective functions can take additional arguments # # (https://optuna.readthedocs.io/en/stable/faq.html#objective-func-additional-args). # def GuidedTuneModel(X,y): # X = x # y=y # def objective(trial): # classifier_name = trial.suggest_categorical("classifier", ["SVC", "RandomForest"]) # if classifier_name == "SVC": # svc_c = trial.suggest_loguniform("svc_c", 1e-10, 1e10) # classifier_obj = sklearn.svm.SVC(C=svc_c, gamma="auto") # else: # rf_max_depth = int(trial.suggest_loguniform("rf_max_depth", 2, 32)) # classifier_obj = sklearn.ensemble.RandomForestClassifier( # max_depth=rf_max_depth, n_estimators=10 # ) # score = sklearn.model_selection.cross_val_score(classifier_obj, X, y, n_jobs=-1, cv=3) # accuracy = score.mean() # return accuracy # study = optuna.create_study(direction="maximize") # study.optimize(objective, n_trials=100) # print(study.best_trial)
# Cross validation on the Training model, parameters can be varied according to choice kf = KFold(trainingIds.size, n_folds=4, shuffle=True) print("Starting CV Random Forest") #for SVM cnt = 0 for train_index, test_index in kf: TrainX1 = np.array(X[train_index]) TrainY = np.array(trainingLabels[train_index]) trainingIds1 = np.array(trainingIds[train_index]) print(TrainX1.shape) print(TrainY.shape) print(trainingIds1.shape) clf = svm.NuSVC(nu=0.02, probability=True, cache_size=24000) clf.fit(TrainX1, TrainY) TestX = np.array(X[test_index]) TestY = np.array(trainingLabels[test_index]) trainingIds1 = np.array(trainingIds[test_index]) print(TestX.shape) print(TestY.shape) print(trainingIds1.shape) mas = clf.score(TestX, TestY) print("Cross validation mean accuracy: ", mas) cnt = cnt + 1 if cnt == 1: maxMas = mas model = clf elif mas > maxMas: maxMas = mas
if title is not None: plt.title(title) plt.show() def add_noise(data, sigma=32.0, size=None): # Noise function, sigma 16.0 ~ 6% noise if not size: size = data.shape noise = np.random.normal(0.0, sigma, size) return np.clip(data+noise,0,255) # PCA and plotting for noisy dataset #pca2, x3, x4 = do_pca(add_noise(data),target) # Plot visualization #plot_embedding(x3) # PCA and plotting for original dataset pca1, x1, x2 = do_pca(n_train,n_test,0.8) # Plot visualization #plot_embedding(x1) # SVM classification for scoring original estimator (PCA) svm1 = svm.NuSVC(verbose=True) svm1.fit(x2[2000:], list(labels2)[2000:]) svm_score = svm1.score(x2[:2000], list(labels2)[:2000]) print svm_score
# -*- coding: utf-8 -*- from sklearn import svm from sklearn.datasets import load_iris from sklearn_porter import Porter iris_data = load_iris() X, y = iris_data.data, iris_data.target clf = svm.NuSVC(gamma=0.001, kernel='rbf', random_state=0) clf.fit(X, y) # Cheese! result = Porter(language='php').port(clf) print(result) """ <?php class Tmp { public static function predict($atts) { $svs = [[4.9000000000000004, 3.0, 1.3999999999999999, 0.20000000000000001], [4.5999999999999996, 3.1000000000000001, 1.5, 0.20000000000000001], [5.4000000000000004, 3.8999999999999999, 1.7, 0.40000000000000002], [5.0, 3.3999999999999999, 1.5, 0.20000000000000001], [4.9000000000000004, 3.1000000000000001, 1.5, 0.10000000000000001], [5.4000000000000004, 3.7000000000000002, 1.5, 0.20000000000000001], [4.7999999999999998, 3.3999999999999999, 1.6000000000000001, 0.20000000000000001], [5.7000000000000002, 4.4000000000000004, 1.5, 0.40000000000000002], [5.7000000000000002, 3.7999999999999998, 1.7, 0.29999999999999999], [5.0999999999999996, 3.7999999999999998, 1.5, 0.29999999999999999], [5.4000000000000004, 3.3999999999999999, 1.7, 0.20000000000000001], [5.0999999999999996, 3.7000000000000002, 1.5, 0.40000000000000002], [5.0999999999999996, 3.2999999999999998, 1.7, 0.5], [4.7999999999999998, 3.3999999999999999, 1.8999999999999999, 0.20000000000000001], [5.0, 3.0, 1.6000000000000001, 0.20000000000000001], [5.0, 3.3999999999999999, 1.6000000000000001, 0.40000000000000002], [5.2000000000000002, 3.5, 1.5, 0.20000000000000001], [4.7000000000000002, 3.2000000000000002, 1.6000000000000001, 0.20000000000000001], [4.7999999999999998, 3.1000000000000001, 1.6000000000000001, 0.20000000000000001], [5.4000000000000004, 3.3999999999999999, 1.5, 0.40000000000000002], [4.9000000000000004, 3.1000000000000001, 1.5, 0.10000000000000001], [4.9000000000000004, 3.1000000000000001, 1.5, 0.10000000000000001], [5.0999999999999996, 3.3999999999999999, 1.5, 0.20000000000000001], [4.5, 2.2999999999999998, 1.3, 0.29999999999999999], [5.0, 3.5, 1.6000000000000001, 0.59999999999999998], [5.0999999999999996, 3.7999999999999998, 1.8999999999999999, 0.40000000000000002], [4.7999999999999998, 3.0, 1.3999999999999999, 0.29999999999999999], [5.0999999999999996, 3.7999999999999998, 1.6000000000000001, 0.20000000000000001], [5.2999999999999998, 3.7000000000000002, 1.5, 0.20000000000000001], [7.0, 3.2000000000000002, 4.7000000000000002, 1.3999999999999999], [6.4000000000000004, 3.2000000000000002, 4.5, 1.5], [6.9000000000000004, 3.1000000000000001, 4.9000000000000004, 1.5], [5.5, 2.2999999999999998, 4.0, 1.3], [6.5, 2.7999999999999998, 4.5999999999999996, 1.5], [5.7000000000000002, 2.7999999999999998, 4.5, 1.3], [6.2999999999999998, 3.2999999999999998, 4.7000000000000002, 1.6000000000000001], [4.9000000000000004, 2.3999999999999999, 3.2999999999999998, 1.0], [6.5999999999999996, 2.8999999999999999, 4.5999999999999996, 1.3], [5.2000000000000002, 2.7000000000000002, 3.8999999999999999, 1.3999999999999999], [5.0, 2.0, 3.5, 1.0], [5.9000000000000004, 3.0, 4.2000000000000002, 1.5], [6.0, 2.2000000000000002, 4.0, 1.0], [6.0999999999999996, 2.8999999999999999, 4.7000000000000002, 1.3999999999999999], [5.5999999999999996, 2.8999999999999999, 3.6000000000000001, 1.3], [6.7000000000000002, 3.1000000000000001, 4.4000000000000004, 1.3999999999999999], [5.5999999999999996, 3.0, 4.5, 1.5], [5.7999999999999998, 2.7000000000000002, 4.0999999999999996, 1.0], [6.2000000000000002, 2.2000000000000002, 4.5, 1.5], [5.5999999999999996, 2.5, 3.8999999999999999, 1.1000000000000001], [5.9000000000000004, 3.2000000000000002, 4.7999999999999998, 1.8], [6.0999999999999996, 2.7999999999999998, 4.0, 1.3], [6.2999999999999998, 2.5, 4.9000000000000004, 1.5], [6.0999999999999996, 2.7999999999999998, 4.7000000000000002, 1.2], [6.5999999999999996, 3.0, 4.4000000000000004, 1.3999999999999999], [6.7999999999999998, 2.7999999999999998, 4.7999999999999998, 1.3999999999999999], [6.7000000000000002, 3.0, 5.0, 1.7], [6.0, 2.8999999999999999, 4.5, 1.5], [5.7000000000000002, 2.6000000000000001, 3.5, 1.0], [5.5, 2.3999999999999999, 3.7999999999999998, 1.1000000000000001], [5.5, 2.3999999999999999, 3.7000000000000002, 1.0], [5.7999999999999998, 2.7000000000000002, 3.8999999999999999, 1.2], [6.0, 2.7000000000000002, 5.0999999999999996, 1.6000000000000001], [5.4000000000000004, 3.0, 4.5, 1.5], [6.0, 3.3999999999999999, 4.5, 1.6000000000000001], [6.7000000000000002, 3.1000000000000001, 4.7000000000000002, 1.5], [6.2999999999999998, 2.2999999999999998, 4.4000000000000004, 1.3], [5.5999999999999996, 3.0, 4.0999999999999996, 1.3], [5.5, 2.5, 4.0, 1.3], [5.5, 2.6000000000000001, 4.4000000000000004, 1.2], [6.0999999999999996, 3.0, 4.5999999999999996, 1.3999999999999999], [5.7999999999999998, 2.6000000000000001, 4.0, 1.2], [5.0, 2.2999999999999998, 3.2999999999999998, 1.0], [5.5999999999999996, 2.7000000000000002, 4.2000000000000002, 1.3], [5.7000000000000002, 3.0, 4.2000000000000002, 1.2], [5.7000000000000002, 2.8999999999999999, 4.2000000000000002, 1.3], [6.2000000000000002, 2.8999999999999999, 4.2999999999999998, 1.3], [5.0999999999999996, 2.5, 3.0, 1.1000000000000001], [5.7000000000000002, 2.7999999999999998, 4.0999999999999996, 1.3], [5.7999999999999998, 2.7000000000000002, 5.0999999999999996, 1.8999999999999999], [6.2999999999999998, 2.8999999999999999, 5.5999999999999996, 1.8], [4.9000000000000004, 2.5, 4.5, 1.7], [6.5, 3.2000000000000002, 5.0999999999999996, 2.0], [6.4000000000000004, 2.7000000000000002, 5.2999999999999998, 1.8999999999999999], [5.7000000000000002, 2.5, 5.0, 2.0], [5.7999999999999998, 2.7999999999999998, 5.0999999999999996, 2.3999999999999999], [6.4000000000000004, 3.2000000000000002, 5.2999999999999998, 2.2999999999999998], [6.5, 3.0, 5.5, 1.8], [6.0, 2.2000000000000002, 5.0, 1.5], [5.5999999999999996, 2.7999999999999998, 4.9000000000000004, 2.0], [6.2999999999999998, 2.7000000000000002, 4.9000000000000004, 1.8], [6.2000000000000002, 2.7999999999999998, 4.7999999999999998, 1.8], [6.0999999999999996, 3.0, 4.9000000000000004, 1.8], [7.2000000000000002, 3.0, 5.7999999999999998, 1.6000000000000001], [6.2999999999999998, 2.7999999999999998, 5.0999999999999996, 1.5], [6.0999999999999996, 2.6000000000000001, 5.5999999999999996, 1.3999999999999999], [6.4000000000000004, 3.1000000000000001, 5.5, 1.8], [6.0, 3.0, 4.7999999999999998, 1.8], [6.9000000000000004, 3.1000000000000001, 5.4000000000000004, 2.1000000000000001], [6.9000000000000004, 3.1000000000000001, 5.0999999999999996, 2.2999999999999998], [5.7999999999999998, 2.7000000000000002, 5.0999999999999996, 1.8999999999999999], [6.7000000000000002, 3.0, 5.2000000000000002, 2.2999999999999998], [6.2999999999999998, 2.5, 5.0, 1.8999999999999999], [6.5, 3.0, 5.2000000000000002, 2.0], [6.2000000000000002, 3.3999999999999999, 5.4000000000000004, 2.2999999999999998], [5.9000000000000004, 3.0, 5.0999999999999996, 1.8]]; $coeffs = [[4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 0.0, 4.6863813658892557, 0.0, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 0.0, 0.0, -0.0, -0.0, -0.0, -4.6863813658892557, -0.0, -0.0, -0.0, -4.6863813658892557, -0.0, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -0.0, -4.6863813658892557, -0.0, -0.0, -4.6863813658892557, -0.0, -4.6863813658892557, -0.0, -4.6863813658892557, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -0.0, -0.0, -0.0, -0.0, -0.0, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -0.0, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -0.0, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -0.0, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948], [0.0, 0.0, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 0.0, 2.1272220789292948, 2.1272220789292948, 0.0, 2.1272220789292948, 2.1272220789292948, 47.529341773693893, 47.529341773693893, 47.529341773693893, 0.0, 47.529341773693893, 47.529341773693893, 47.529341773693893, 0.0, 47.529341773693893, 0.0, 0.0, 0.0, 0.0, 47.529341773693893, 0.0, 47.529341773693893, 47.529341773693893, 0.0, 47.529341773693893, 0.0, 47.529341773693893, 0.0, 47.529341773693893, 47.529341773693893, 47.529341773693893, 47.529341773693893, 47.529341773693893, 47.529341773693893, 0.0, 0.0, 0.0, 0.0, 47.529341773693893, 47.529341773693893, 47.529341773693893, 47.529341773693893, 47.529341773693893, 0.0, 0.0, 47.529341773693893, 47.529341773693893, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -0.0, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -0.0, -47.529341773693893]]; $inters = [0.10061840191760112, 0.051748160156319709, -0.084181689668018464]; $classes = [0, 1, 2]; // exp(-y|x-x'|^2) $kernels = array_fill(0, 105, 0); for ($i = 0; $i < 105; $i++) {
print "gt: ", ground_truth_labels return float(collections.Counter(subtract)[0]) / len(ground_truth_labels) labels, vectors = read_csv('./kaggle/train.csv', 1000) # test_labels, test_vectors = read_csv('./kaggle/test.csv',10) # one_hot_labels_test = one_hot_encode(test_labels) """ divide training data and validation data """ N_train = int(len(labels) * 0.7) N_validation = len(labels) - N_train vectors_train, vectors_validation, labels_train, labels_validation = train_test_split( vectors, labels, test_size=N_validation) # data distribution function #create instance of SVC clf = svm.NuSVC(kernel='rbf', nu=0.01) #determine the hyperplane print "determining Hyperplane..." clf.fit(vectors_train, labels_train) print "determined Hyperplane" #prediction using the hyperplane print "predicting . . . " pd = clf.predict(vectors_validation) #calculate the accuracy print "accuracy: ", accuracy(pd, labels_validation)
trainingLabels = num.concatenate((num.zeros(averageObjectTraining.matrix.shape[0]),\ 1 * num.ones(variableObjectTraining.matrix.shape[0]),\ 2 * num.ones(EBTraining.matrix.shape[0]))) testingLabels = num.concatenate((num.zeros(averageObjectTesting.matrix.shape[0]),\ 1 * num.ones(variableObjectTesting.matrix.shape[0]),\ 2 * num.ones(EBTesting.matrix.shape[0]))) #Creating classifiers decisionTree = DecisionTreeClassifier(max_depth=None, min_samples_split=1, random_state=0) randomForest = RandomForestClassifier(n_estimators=10, max_depth=None, min_samples_split=1, random_state=0) extraTrees = ExtraTreesClassifier(n_estimators=10, max_depth=None, min_samples_split=1, random_state=0) SVCrbf = svm.SVC(kernel='rbf', gamma=0.7) SVClinear = svm.SVC(kernel='linear') SVCpoly = svm.SVC(kernel='poly', degree=3) LinearSVC = svm.LinearSVC() NuSVC = svm.NuSVC() #Applying classifiers to data decisionTree = decisionTree.fit(trainingSet, trainingLabels) randomForest = randomForest.fit(trainingSet, trainingLabels) extraTrees = extraTrees.fit(trainingSet, trainingLabels) SVCrbf = SVCrbf.fit(trainingSet, trainingLabels) SVClinear = SVClinear.fit(trainingSet, trainingLabels) SVCpoly = SVCpoly.fit(trainingSet, trainingLabels) LinearSVC = LinearSVC.fit(trainingSet, trainingLabels) NuSVC = NuSVC.fit(trainingSet, trainingLabels) #Finding predictions decisionTreePredictions = decisionTree.predict(testingSet) randomForestPredictions = randomForest.predict(testingSet) extraTreesPredictions = extraTrees.predict(testingSet)
def train(self, dset, enable=[0, 0, 1, 0, 1, 1], cc=False): mods = ['KNN', 'SVC', 'SVM', 'XGBoost', 'MLP', 'RF'] # local mods list def tune_param(clf, param, name): model = GridSearchCV(clf, param_grid=param, return_train_score=True, cv=20) model.fit(train_X, train_y) print(name + " INFO:") print("Best hyper paramters:", model.best_params_) print("Best accuracy value: ", model.best_score_) clf.set_params(**model.best_params_) clf.fit(train_X, train_y) # actually fitting the model print("prediction score: ", model.score(test_X, test_y)) print(clf) # plot_posterior(X, y, newX, newy, clf, name, savefile) # xx, yy, Z, new_p, zz = plot_decision_boundaries(X, y ,clf, h=h) return clf post = [] if cc: train_X, train_y, test_X, test_y = self.Ctrain_X[ dset], self.Ctrain_y[dset], self.Ctest_X[dset], self.Ctest_y[ dset] else: train_X, train_y, test_X, test_y = self.train_X[ dset], self.train_y[dset], self.test_X[dset], self.test_y[dset] ##### KNN ##### if enable[0] == 1: tuned_param = [{ 'n_neighbors': [3, 5, 7], 'leaf_size': range(10, 100, 10) }, { 'n_neighbors': [3, 5, 7], 'leaf_size': range(10, 100, 10) }, { 'n_neighbors': [7] }, { 'n_neighbors': [3, 5, 7], 'leaf_size': range(10, 100, 10) }, { 'n_neighbors': [3, 5, 7], 'leaf_size': range(10, 100, 10) }] temp = tune_param(KNeighborsClassifier(), tuned_param[dset], mods[0]) post.append(temp) ##### SVC ##### if enable[1] == 1: tuned_param = [{ 'gamma': ['auto'], 'probability': [True] }, { 'gamma': ['auto'], 'probability': [True] }, { 'C': np.linspace(1, 10, 10), 'probability': [True] }, { 'gamma': ['auto'], 'probability': [True] }, { 'gamma': ['auto'], 'probability': [True] }] temp = tune_param(svm.SVC(), tuned_param[dset], mods[1]) post.append(temp) ##### nuSVC ##### if enable[2] == 1: tuned_param = [{ 'gamma': ['auto'], 'probability': [True] }, { 'gamma': ['auto'], 'probability': [True] }, { 'probability': [True] }, { 'gamma': ['auto'], 'probability': [True] }, { 'gamma': ['auto'], 'probability': [True] }] temp = tune_param(svm.NuSVC(), tuned_param[dset], mods[2]) post.append(temp) ##### xgbooster ##### if enable[3] == 1: tuned_param = [{ 'n_jobs': [-1], 'learning_rate': np.linspace(0, 1, 20), 'n_estimators': [64, 128, 256], 'gamma': [0], 'objective': ['binary:logistic'] }, { 'n_jobs': [-1], 'learning_rate': np.linspace(0, 1, 20), 'n_estimators': [64, 128, 256], 'gamma': [0], 'objective': ['binary:logistic'] }, { 'n_jobs': [-1], 'learning_rate': np.linspace(0, 1, 20), 'n_estimators': [64, 128, 256], 'gamma': [0] }, { 'n_jobs': [-1], 'learning_rate': np.linspace(0, 1, 20), 'n_estimators': [64, 128, 256], 'gamma': [0], 'objective': ['binary:logistic'] }, { 'n_jobs': [-1], 'learning_rate': np.linspace(0, 1, 20), 'n_estimators': [64, 128, 256], 'gamma': [0], 'objective': ['binary:logistic'] }] temp = tune_param(xgb.XGBClassifier(objective='binary:logistic'), tuned_param[dset], mods[3]) post.append(temp) #### mlp ##### if enable[4] == 1: tuned_param = [{ 'alpha': [0], 'max_iter': [7000], 'hidden_layer_sizes': [100], 'learning_rate_init': [0.0001] }, { 'alpha': [0], 'max_iter': [7000], 'hidden_layer_sizes': [100], 'learning_rate_init': [0.0001] }, { 'alpha': [0], 'max_iter': [10000], 'activation': ['logistic', 'relu'], 'learning_rate_init': [0.0001], 'solver': ['lbfgs'] }, { 'alpha': [0], 'max_iter': [7000], 'hidden_layer_sizes': [100], 'learning_rate_init': [0.0001] }, { 'alpha': [0], 'max_iter': [7000], 'hidden_layer_sizes': [100], 'learning_rate_init': [0.0001] }] temp = tune_param(MLPClassifier(), tuned_param[dset], mods[4]) post.append(temp) #### RF ##### if enable[5] == 1: tuned_param = [{ 'max_depth': [10], 'n_estimators': [128] }, { 'max_depth': [10], 'n_estimators': [128] }, { 'max_depth': [10], 'n_estimators': [128] }, { 'max_depth': [10], 'n_estimators': [128] }, { 'max_depth': [10], 'n_estimators': [128] }] temp = tune_param(RandomForestClassifier(n_jobs=-1), tuned_param[dset], mods[5]) post.append(temp) qda = QuadraticDiscriminantAnalysis() qda.fit(train_X, train_y) post.append(qda) return post
min_val, max_val = np.inf, -np.inf train_jitter, min_val, max_val = norm(pd.read_csv(filepath + train_jitter).values, train=True, min_val=min_val, max_val=max_val) train_label = np.load(filepath + train_label) dev_jitter, dev_label = norm(pd.read_csv(filepath + dev_jitter).values, train=False, min_val=min_val, max_val=max_val), np.load(filepath + dev_label) test_jitter, test_label = norm(pd.read_csv(filepath + test_jitter).values, train=False, min_val=min_val, max_val=max_val), np.load(filepath + test_label) clf = svm.NuSVC(gamma='auto') clf.fit(train_jitter, train_label) # predict train_pred_labels = clf.predict(train_jitter) dev_pred_labels = clf.predict(dev_jitter) test_pred_labels = clf.predict(test_jitter) print(metrics(train_label, train_pred_labels)) print(metrics(dev_label, dev_pred_labels)) print(metrics(test_label, test_pred_labels))
ec.get_feature_names(), titanic.select_dtypes(exclude="object").drop("Y", axis=1).columns.tolist() ]) Y = titanic.Y X_new, y_new = ro.fit_resample(X, Y) X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, random_state=16) # %% # Model clf = GradientBoostingClassifier(n_estimators=10000, random_state=16) clf.fit(X_train, y_train) clf.score(X_test, y_test) clf_svm = svm.NuSVC(gamma='auto', random_state=16) clf_svm.fit(X_train, y_train) clf_MLP = MLPClassifier(solver='lbfgs', hidden_layer_sizes={2, 13}, max_iter=10000, alpha=1e-5, random_state=16) clf_MLP.fit(X_train, y_train) clf_forest = RandomForestClassifier(n_estimators=10000, random_state=16) clf_forest.fit(X_train, y_train) # %% mlp_y_pred = clf_MLP.predict(X_test) svm_y_pred = clf_svm.predict(X_test)
model = svm.SVC(kernel='linear', C=0.4) model = model.fit(X_train, y_train) score_report(X_test, y_test) model = svm.SVC(kernel='rbf', gamma=1.0, C=0.13) model = model.fit(X_train, y_train) score_report(X_test, y_test) model = svm.SVC(kernel='poly', degree=2, C=0.05) model = model.fit(X_train, y_train) score_report(X_test, y_test) model = svm.NuSVC(probability=True) model = model.fit(X_train, y_train) score_report(X_test, y_test) from sklearn.discriminant_analysis import LinearDiscriminantAnalysis model = LinearDiscriminantAnalysis() model = model.fit(X_train, y_train) score_report(X_test, y_test) from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis model = QuadraticDiscriminantAnalysis() model = model.fit(X_train, y_train)
train_X1 = data_X[0:50, :] train_X2 = data_X[81:, :] train_y1 = data_y[0:50, :] train_y2 = data_y[81:, :] X_train = np.concatenate((train_X1, train_X2), axis=0) y_train = np.concatenate((train_y1, train_y2), axis=0) y_train = y_train.reshape(-1) X_test = data_X[50:80, :] y_test = data_y[50:80, :] y_test = y_test.reshape(-1) #print (X_train.shape,y_train.shape) #print (X_test.shape,y_test.shape) clf = svm.NuSVC(nu=0.2) clf.fit(X_train, y_train) score_list.append(clf.score(X_test, y_test)) clf = svm.SVC(C=1, kernel='sigmoid', degree=2, gamma=100) clf.fit(X_train, y_train) score_list.append(clf.score(X_test, y_test)) clf = LogisticRegression(C=1, penalty='l1') clf.fit(X_train, y_train) score_list.append(clf.score(X_test, y_test)) clf = GradientBoostingClassifier() clf.fit(X_train, y_train) score_list.append(clf.score(X_test, y_test)) clf = tree.DecisionTreeClassifier() clf.fit(X_train, y_train) score_list.append(clf.score(X_test, y_test)) clf = AdaBoostClassifier(n_estimators=100)
def init_svm(self) -> None: all_models = [svm.NuSVC(probability=True), svm.SVC(probability=True)] self.models.extend(all_models) models = ["nu", "svc"] for mod in models: self.model_keys[mod] = "svm"
Perform binary classification using non-linear SVC with RBF kernel. The target to predict is a XOR of the inputs. """ print __doc__ import numpy as np import pylab as pl from sklearn import svm xx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500)) np.random.seed(0) X = np.random.randn(300, 2) Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) # fit the model clf = svm.NuSVC() clf.fit(X, Y) # plot the line, the points, and the nearest vectors to the plane Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) pl.set_cmap(pl.cm.Paired) pl.pcolormesh(xx, yy, Z) pl.scatter(X[:, 0], X[:, 1], c=Y) pl.axis('tight') pl.show()
import pylab as pl from sklearn import svm, datasets # import some data to play with iris = datasets.load_iris() X = iris.data[:, :2] # we only take the first two features. We could # avoid this ugly slicing by using a two-dim dataset Y = iris.target h = .02 # step size in the mesh # we create an instance of SVM and fit out data. We do not scale our # data since we want to plot the support vectors svc = svm.SVC(kernel='linear').fit(X, Y) rbf_svc = svm.SVC(kernel='poly').fit(X, Y) nu_svc = svm.NuSVC(kernel='linear').fit(X, Y) lin_svc = svm.LinearSVC().fit(X, Y) # create a mesh to plot in x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # title for the plots titles = [ 'SVC with linear kernel', 'SVC with polynomial (degree 3) kernel', 'NuSVC with linear kernel', 'LinearSVC (linear kernel)' ] pl.set_cmap(pl.get_cmap('jet'))