def SVM(): train_y = list() train_X = loadData('datatraining.txt', train_y, 8000) print(len(train_y)) print(len(train_X)) test_y = list() test_X = loadData('datatest.txt', test_y) #print(train_X, '/n', test_X) sc = StandardScaler() sc.fit(train_X) train_X_std = sc.transform(train_X) test_X_std = sc.transform(test_X) model = svm.SVC(gamma=0.01) model.fit(train_X_std, train_y) y_pred = model.predict(test_X_std) print("Std accuracy: {0: .2f}%".format( accuracy_score(test_y, y_pred) * 100)) print("Std accuracy: {0: .4f}".format(accuracy_score(test_y, y_pred))) model2 = svm.SVC(gamma=0.01) model2.fit(train_X, train_y) y_pred_2 = model.predict(test_X) print("NonStd accuracy: {0: .2f}%".format( accuracy_score(test_y, y_pred_2) * 100)) print("NonStd accuracy: {0: .4f}".format(accuracy_score(test_y, y_pred_2)))
def SVM(): train_y = list() train_X = loadData('train.csv', train_y, 1000) print(len(train_y)) print(len(train_X)) test_y = list() test_X = loadData('test.csv', test_y) # print(train_X, '/n', test_X) sc = StandardScaler() sc.fit(train_X) train_X_std = sc.transform(train_X) test_X_std = sc.transform(test_X) model = svm.SVC(gamma=0.01) model.fit(train_X_std, train_y) y_pred = model.predict(test_X_std) print("accuracy: {0: .2f}%".format(accuracy_score(test_y, y_pred) * 100)) print(train_y) print(test_y)
def mcsvmTrain(X,Y,C,kernelFunction,tol=1e-3): uniqueLabels = np.unique(Y) labelCounts = uniqueLabels.size #print 'multiclass SVM training' #print 'number of classes:',labelCounts model = {} classifiers = np.array([]) model['uniqueLabels'] = uniqueLabels model['labelCounts'] = labelCounts classifiers = np.array([]) #print 'Multiclass SVM classification using one-to-one strategy' nclassifiers = np.round(labelCounts*(labelCounts+1)/2) #print nclassifiers, 'SVM classifiers will be trained' count = 0 for i in np.arange(labelCounts): for j in np.arange(i+1,labelCounts): label1 = uniqueLabels[i] label2 = uniqueLabels[j] count+=1 print '{0}. SVM classification: {1:d} and {2:d}'.format(count,label1,label2) idx = (Y==label1) + (Y==label2) subX1 = X[Y==label1,:] subX2 = X[Y==label2,:] subX = np.append(subX1,subX2,axis=0) subY = np.append(np.ones(subX1.shape[0]),np.zeros(subX2.shape[0])) m = svm.SVC(C,kernelFunction) m.fit(subX,subY) classifier={} classifier['m'] = m classifier['idx1'] = i classifier['idx2'] = j classifiers=np.append(classifiers,classifier) model['classifiers'] = classifiers return model
def svm_baseline(): training_data, validation_data, test_data = mnist_loader.load_data() # train clf = svm.SVC() clf.fit(training_data[0], training_data[1]) # test predictions = [int(a) for a in clf.predict(test_data[0])] num_correct = sum(int(a == y) for a, y in zip(predictions, test_data[1])) print("\nBaseline Classifier using a svm\n") print("%s of %s values correct." % (num_correct, len(test_data[1])))
def svm_classifier(train, query, train_cols): ###特征数据,带查询的特征数据,去除字符串的特征信息 clf = svm.SVC() ##分类器 train[train_cols] = preprocessing.scale(train[train_cols]) query[train_cols] = preprocessing.scale(query[train_cols]) print clf.fit(train[train_cols], train['firmware_bool']) scores = cv.cross_val_score(clf, train[train_cols], train['firmware_bool'], cv=30) print('Estimated score SVM: %0.5f (+/- %0.5f)' % (scores.mean(), scores.std() / 2)) query['result'] = clf.predict(query[train_cols]) print query[['url', 'result']]
def main(): import os if not os.path.exists('data'): os.chdir('..') import evaluation import optimize import svm for dataset in [0, 1, 2]: print('DATASET={}'.format(dataset)) X = data.load(k=dataset) spec_k = data.precomputed_kernels(None, 'cum_spectrum_31')[0][dataset] def levenshtein_kernel_diff(params, I): factors = ag.exp(params) dists = levenshtein_distance_v2(X[I], X[I], weights=factors[:10], tqdm=False) scale = factors[10] return ag.exp( -dists / (dists.mean() + 1e-3) * scale) + factors[11] * spec_k[I][:, I].astype(np.float32) n = 512 num_folds = 2 θ = ag.zeros(12) λ = ag.zeros(1) θ, λ, stats = optimize.optimize( kernel=levenshtein_kernel_diff, clf=optimize.KernelRidge, Y=data.train_Ys[dataset], indices=lambda: np.random.permutation(len(X))[:n], folds=lambda p: data.k_folds_indices(p, num_folds), θ=θ, λ=λ, β=1e2, iters=50, verbose=False, ) print(θ, λ) K = levenshtein_kernel_diff(θ, np.arange(len(X))).data for _ in range(3): print( evaluation.evaluate(svm.SVC(C=10), K, data.train_Ys[dataset], folds=20))
def __init__(self, string): self.string1 = string df = pd.read_csv( "E:\Hackathon\HackathonProject\Music\FirstPythonProject\static\TrainDataSet.csv", sep="\t") senti, twt = list(df["Sentiment"]), list(df["Tweet"]) path = "E:\Hackathon\HackathonProject\Music\FirstPythonProject\static\\" + self.string1 df1 = pd.read_csv(path, header=None, names=["sentiment"]) tweets, self.stringTweets = list(df1["sentiment"]), str( df1["sentiment"]) tf = TfidfVectorizer(min_df=0, max_df=1.0, stop_words='english', ngram_range=(1, 1)) features_train, features_test, labels_train, labels_test = cross_validation.train_test_split( twt, senti, train_size=0.90, random_state=42) features_train_transform = tf.fit_transform(features_train, labels_train) features_test_transform = tf.transform(tweets) # selector = SelectPercentile(f_classif, percentile=50) # selector.fit(features_train_transform, labels_train) # selector.transform(features_test_transform) # features_train_transform = selector.transform(features_train_transform).toarray() # features_test_transform = selector.transform(features_test_transform).toarray() # # mnb = MultinomialNB() # mnb.fit(features_train_transform, labels_train) # prediction = mnb.predict(features_test_transform) # print("Accuracy Using sklearn metrics : {}".format(accuracy_score(prediction, labels_test))) svm_ = svm.SVC(gamma=1, C=100) svm_.fit(features_train_transform, labels_train) predictionsvm = svm_.predict(features_test_transform) # print(prediction)2 print(predictionsvm) # print("Accuracy Using sklearn metrics for SVM : {}".format(accuracy_score(predictionsvm, labels_test))) resultList = list(predictionsvm) self.pos, self.neg, self.neu, self.worcloud = resultList.count( '4'), resultList.count('0'), resultList.count('2'), str( df1["sentiment"])
def svm_classifier(train, query, train_cols): clf = svm.SVC() train[train_cols] = preprocessing.scale(train[train_cols]) query[train_cols] = preprocessing.scale(query[train_cols]) print clf.fit(train[train_cols], train['malicious']) scores = cv.cross_val_score(clf, train[train_cols], train['malicious'], cv=30) print('Estimated score SVM: %0.5f (+/- %0.5f)' % (scores.mean(), scores.std() / 2)) query['result'] = clf.predict(query[train_cols]) print query[['URL', 'result']]
- Use the scaler to transform `X`, with the `.transform()` method. from sklearn.preprocessing import StandardScaler scaler = StandardScaler() scaler.fit(X) Xscale = scaler.transform(X) #### 1\. Fit a SVC (i.e., linear SVM) Fitting an SVM can be done using `sklearn.svm`'s `SVC` module. from sklearn import svm Do the fitting here: my_svc = svm.SVC(C=1, kernel="linear") my_svc.fit(Xscale, y) What's the accuracy? Try changing the `C` parameter. sum(my_svc.predict(Xscale) == y) / len(y) #### 2\. Fit a radial-basis SVM Try again, this time with radial SVM. What's the accuracy? Try changing the parameters. my_svc = svm.SVC(C=1, kernel="rbf", gamma=100) my_svc.fit(Xscale, y) sum(my_svc.predict(Xscale) == y) / len(y) ## Cross validation
import pandas as pd import svm from random import randint #read data dataframe=pd.read_csv("tweets_all.csv") X = dataframe[['tweet']] y = dataframe[['sentiment']] best_score = 0 best_params = {'C': None, 'gamma': None} #for a preset number of iterations for i in range(10): #try random values for each hyperparameter svc = svm.SVC(C=randint(0, 9), gamma=randint(0, 3)) svc.fit(X, y) score = svc.score(Xval, yval) if score > best_score: best_score = score best_params['C'] = C best_params['gamma'] = gamma best_score, best_params
import matplotlib.pyplot as plt import numpy as np import svm plt.axes().set_aspect("equal") np.random.seed(0) X0 = np.random.randn(100, 2) X1 = np.random.randn(100, 2) + np.array([2.5, 3]) y = np.array([1] * 100 + [-1] * 100) X = np.r_[X0, X1] model = svm.SVC() model.fit(X, y) xmin, xmax = X[:, 0].min(), X[:, 0].max() ymin, ymax = X[:, 1].min(), X[:, 1].max() plt.scatter(X0[:, 0], X0[:, 1], color="k", marker="*") plt.scatter(X1[:, 0], X1[:, 1], color="k", marker="+") xmesh, ymesh = np.meshgrid(np.linspace(xmin, xmax, 200), np.linspace(ymin, ymax, 200)) Z = model.predict(np.c_[xmesh.ravel(), ymesh.ravel()]).reshape(xmesh.shape) plt.contour(xmesh, ymesh, Z, levels=[0], colors="k") print("正しく分類できた数:", (model.predict(X) == y).sum()) plt.show()
pd.set_option('display.width', desired_width) roomOcc = pd.read_csv("datatraining.txt") roomOcc = roomOcc.drop(["date"], axis=1) #print(roomOcc.head()) #print(roomOcc.shape) d = roomOcc.describe() print(d) #corr = roomOcc.corr() #plt.figure(figsize=(10, 10)) #sns.heatmap(corr, vmax=.8, linewidths=0.01, # square=True, annot=True, cmap='Purples', linecolor="white") #plt.title('Correlation between features') roomOccTest = pd.read_csv("datatest.txt") roomOccTest = roomOccTest.drop(["date"], axis=1) #print(roomOccTest.head()) y_train = roomOcc.pop('Occupancy').values y_test = roomOccTest.pop('Occupancy').values print(len(y_train)) print(len(y_test)) svmModel = svm.SVC(C=0.5) #Set the number of training data svmModel.fit(roomOcc[:17000], y_train[:17000]) predict = svmModel.predict(roomOccTest) print(accuracy_score(y_test, predict))
# for gamma in np.logspace(-7,2,10): # print("gamma : %f, C : %f"%(gamma, C)) def kernel(x,y): term = np.dot(x, y.T) for i in range(term.shape[0]): for j in range(i+1): k = np.exp(- gamma * np.linalg.norm(x[i]-y[j], ord=2)**2) term[i,j] += k term[j,i] += k return term # m = svm_train(labels , data,'-s 0 -t 1 -c 5 -g 0.05 -q') # clf = svm.SVC(kernel='rbf', C=5, gamma=0.05) # clf.fit(data, labels) # print("accuracy : %f"%(np.mean(clf.predict(test['imgs'])==test['lbls']))) clf = svm.SVC(C=C, kernel=kernel) clf.fit(data, labels) # print("accuracy : %f"%(np.mean(clf.predict(make_kmat(lambda x, y: np.dot(x, y.T) + np.exp(- 0.05 * np.linalg.norm(x-y, ord=2)**2), test['imgs']))==test['lbls']))) # sv = clf.support_vectors_ acc = np.mean(clf.predict(test["imgs"])==test['lbls']) print("accuracy : %f"%(acc)) if acc > best_acc: best_acc = acc best_params = (gamma, C) print('new best : ' + str(best_params)) # sv = sparse_to_dense_array(m.get_SV(), 784) # sv = np.asarray(sv) mnist_2d = PCA(data, 2)
import pandas as pd import svm #read data dataframe = pd.read_fwf('tweets.txt') X = dataframe[['tweet']] y = dataframe[['sentiment']] C_values = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100] gamma_values = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100] best_score = 0 best_params = {'C': None, 'gamma': None} for C in C_values: for gamma in gamma_values: svc = svm.SVC(C=C, gamma=gamma) svc.fit(X, y) score = svc.score(Xval, yval) if score > best_score: best_score = score best_params['C'] = C best_params['gamma'] = gamma best_score, best_params
print(t_model_building_ending - t_model_building_start) ### SVM Model Statistics and Evaluation t_SVM_start = time() #0.5 is for 10 thousand #1 for 800 X_train, X_test, y_train, y_test = train_test_split(Model_Feature, Model_Predictor, test_size=0.2, random_state=1989) sys.exit() # svm classification clf = svm.SVC(kernel='linear', C=1.0).fit(X_train, y_train) y_predicted = clf.predict(X_test) cv = cross_validation.cross_val_score(clf, X_train, y_train, cv=10) # performance print("cross validation result(10 fold)") print(np.mean(cv)) print("Classification report for %s" % clf) print(metrics.classification_report(y_test, y_predicted)) print("Confusion matrix") print(metrics.confusion_matrix(y_test, y_predicted)) sys.exit() """ sys.exit()
import matplotlib.pyplot as plt import svm import numpy as np from sklearn import datasets """ Plot using the iris dataset """ iris = datasets.load_iris() X = iris.data[:, :2] # we only take the first two features. y = np.array(list(map(lambda x: -1 if x == 0 else 1, iris.target))) model_linear = svm.SVC(C=0.5, kernel=svm.Kernel.linear()) model_poly = svm.SVC(C=0.5, kernel=svm.Kernel.polynomial(3)) model_rbf = svm.SVC(C=0.5, kernel=svm.Kernel.rbf(2)) for model in [model_linear, model_poly, model_rbf]: model.fit(X, y) def plot_svm(model, X, y, figname, title, axes=[0, 10, 0, 10]): x0_lin = np.linspace(axes[0], axes[1], 100) x1_lin = np.linspace(axes[2], axes[3], 100) x0, x1 = np.meshgrid(x0_lin, x1_lin) X_mesh = np.c_[x0.ravel(), x1.ravel()] #convert mesh points into 2d for pred y_pred = model.predict(X_mesh).reshape( x0.shape) #predict then convert back to meshgrid for contour plot y_decision = model.decision_function(X_mesh).reshape(x0.shape) # plt.figure(figsize=(10, 10)) plt.plot(X[:, 0][y == -1], X[:, 1][y == -1], 'bo', label='Class: -1')