Ejemplo n.º 1
0
def SVM():
    train_y = list()
    train_X = loadData('datatraining.txt', train_y, 8000)
    print(len(train_y))
    print(len(train_X))

    test_y = list()
    test_X = loadData('datatest.txt', test_y)

    #print(train_X, '/n', test_X)
    sc = StandardScaler()
    sc.fit(train_X)

    train_X_std = sc.transform(train_X)
    test_X_std = sc.transform(test_X)

    model = svm.SVC(gamma=0.01)
    model.fit(train_X_std, train_y)

    y_pred = model.predict(test_X_std)

    print("Std accuracy: {0: .2f}%".format(
        accuracy_score(test_y, y_pred) * 100))
    print("Std accuracy: {0: .4f}".format(accuracy_score(test_y, y_pred)))

    model2 = svm.SVC(gamma=0.01)
    model2.fit(train_X, train_y)

    y_pred_2 = model.predict(test_X)

    print("NonStd accuracy: {0: .2f}%".format(
        accuracy_score(test_y, y_pred_2) * 100))
    print("NonStd accuracy: {0: .4f}".format(accuracy_score(test_y, y_pred_2)))
Ejemplo n.º 2
0
def SVM():
    train_y = list()
    train_X = loadData('train.csv', train_y, 1000)
    print(len(train_y))
    print(len(train_X))

    test_y = list()
    test_X = loadData('test.csv', test_y)

# print(train_X, '/n', test_X)
    sc = StandardScaler()
    sc.fit(train_X)

    train_X_std = sc.transform(train_X)
    test_X_std = sc.transform(test_X)

    model = svm.SVC(gamma=0.01)
    model.fit(train_X_std, train_y)

    y_pred = model.predict(test_X_std)

    print("accuracy: {0: .2f}%".format(accuracy_score(test_y, y_pred) * 100))

    print(train_y)
    print(test_y)
Ejemplo n.º 3
0
def mcsvmTrain(X,Y,C,kernelFunction,tol=1e-3):
    uniqueLabels = np.unique(Y)
    labelCounts = uniqueLabels.size
    #print 'multiclass SVM training'
    #print 'number of classes:',labelCounts
    model = {}
    classifiers = np.array([])
    model['uniqueLabels'] = uniqueLabels
    model['labelCounts'] = labelCounts
    classifiers = np.array([])
    #print 'Multiclass SVM classification using one-to-one strategy'
    nclassifiers = np.round(labelCounts*(labelCounts+1)/2)
    #print nclassifiers, 'SVM classifiers will be trained'
    count = 0
    for i in np.arange(labelCounts):
        for j in np.arange(i+1,labelCounts):
            label1 = uniqueLabels[i]
            label2 = uniqueLabels[j]
            count+=1
            print '{0}. SVM classification: {1:d} and {2:d}'.format(count,label1,label2)
            idx = (Y==label1) + (Y==label2)
            subX1 = X[Y==label1,:]
            subX2 = X[Y==label2,:]
            subX = np.append(subX1,subX2,axis=0)
            subY = np.append(np.ones(subX1.shape[0]),np.zeros(subX2.shape[0]))
	    m = svm.SVC(C,kernelFunction)
	    m.fit(subX,subY)
            classifier={}
            classifier['m'] = m
            classifier['idx1'] = i
            classifier['idx2'] = j
            classifiers=np.append(classifiers,classifier)
    model['classifiers'] = classifiers
    return model
Ejemplo n.º 4
0
def svm_baseline():
    training_data, validation_data, test_data = mnist_loader.load_data()
    # train
    clf = svm.SVC()
    clf.fit(training_data[0], training_data[1])
    # test
    predictions = [int(a) for a in clf.predict(test_data[0])]
    num_correct = sum(int(a == y) for a, y in zip(predictions, test_data[1]))
    print("\nBaseline Classifier using a svm\n")
    print("%s of %s values correct." % (num_correct, len(test_data[1])))
Ejemplo n.º 5
0
def svm_classifier(train, query, train_cols):  ###特征数据,带查询的特征数据,去除字符串的特征信息
    clf = svm.SVC()  ##分类器
    train[train_cols] = preprocessing.scale(train[train_cols])
    query[train_cols] = preprocessing.scale(query[train_cols])
    print clf.fit(train[train_cols], train['firmware_bool'])
    scores = cv.cross_val_score(clf,
                                train[train_cols],
                                train['firmware_bool'],
                                cv=30)
    print('Estimated score SVM: %0.5f (+/- %0.5f)' %
          (scores.mean(), scores.std() / 2))
    query['result'] = clf.predict(query[train_cols])
    print query[['url', 'result']]
Ejemplo n.º 6
0
def main():
    import os
    if not os.path.exists('data'):
        os.chdir('..')
    import evaluation
    import optimize
    import svm

    for dataset in [0, 1, 2]:
        print('DATASET={}'.format(dataset))
        X = data.load(k=dataset)
        spec_k = data.precomputed_kernels(None, 'cum_spectrum_31')[0][dataset]

        def levenshtein_kernel_diff(params, I):
            factors = ag.exp(params)
            dists = levenshtein_distance_v2(X[I],
                                            X[I],
                                            weights=factors[:10],
                                            tqdm=False)
            scale = factors[10]
            return ag.exp(
                -dists / (dists.mean() + 1e-3) *
                scale) + factors[11] * spec_k[I][:, I].astype(np.float32)

        n = 512
        num_folds = 2
        θ = ag.zeros(12)
        λ = ag.zeros(1)

        θ, λ, stats = optimize.optimize(
            kernel=levenshtein_kernel_diff,
            clf=optimize.KernelRidge,
            Y=data.train_Ys[dataset],
            indices=lambda: np.random.permutation(len(X))[:n],
            folds=lambda p: data.k_folds_indices(p, num_folds),
            θ=θ,
            λ=λ,
            β=1e2,
            iters=50,
            verbose=False,
        )
        print(θ, λ)

        K = levenshtein_kernel_diff(θ, np.arange(len(X))).data
        for _ in range(3):
            print(
                evaluation.evaluate(svm.SVC(C=10),
                                    K,
                                    data.train_Ys[dataset],
                                    folds=20))
Ejemplo n.º 7
0
    def __init__(self, string):
        self.string1 = string
        df = pd.read_csv(
            "E:\Hackathon\HackathonProject\Music\FirstPythonProject\static\TrainDataSet.csv",
            sep="\t")
        senti, twt = list(df["Sentiment"]), list(df["Tweet"])

        path = "E:\Hackathon\HackathonProject\Music\FirstPythonProject\static\\" + self.string1

        df1 = pd.read_csv(path, header=None, names=["sentiment"])
        tweets, self.stringTweets = list(df1["sentiment"]), str(
            df1["sentiment"])
        tf = TfidfVectorizer(min_df=0,
                             max_df=1.0,
                             stop_words='english',
                             ngram_range=(1, 1))

        features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(
            twt, senti, train_size=0.90, random_state=42)
        features_train_transform = tf.fit_transform(features_train,
                                                    labels_train)
        features_test_transform = tf.transform(tweets)

        # selector = SelectPercentile(f_classif, percentile=50)
        # selector.fit(features_train_transform, labels_train)
        # selector.transform(features_test_transform)
        # features_train_transform = selector.transform(features_train_transform).toarray()
        # features_test_transform = selector.transform(features_test_transform).toarray()
        #
        # mnb = MultinomialNB()
        # mnb.fit(features_train_transform, labels_train)
        # prediction = mnb.predict(features_test_transform)
        # print("Accuracy Using sklearn metrics : {}".format(accuracy_score(prediction, labels_test)))

        svm_ = svm.SVC(gamma=1, C=100)
        svm_.fit(features_train_transform, labels_train)
        predictionsvm = svm_.predict(features_test_transform)

        # print(prediction)2
        print(predictionsvm)
        # print("Accuracy Using sklearn metrics for SVM : {}".format(accuracy_score(predictionsvm, labels_test)))
        resultList = list(predictionsvm)
        self.pos, self.neg, self.neu, self.worcloud = resultList.count(
            '4'), resultList.count('0'), resultList.count('2'), str(
                df1["sentiment"])
Ejemplo n.º 8
0
def svm_classifier(train, query, train_cols):

    clf = svm.SVC()

    train[train_cols] = preprocessing.scale(train[train_cols])
    query[train_cols] = preprocessing.scale(query[train_cols])

    print clf.fit(train[train_cols], train['malicious'])
    scores = cv.cross_val_score(clf,
                                train[train_cols],
                                train['malicious'],
                                cv=30)
    print('Estimated score SVM: %0.5f (+/- %0.5f)' %
          (scores.mean(), scores.std() / 2))

    query['result'] = clf.predict(query[train_cols])

    print query[['URL', 'result']]
Ejemplo n.º 9
0
- Use the scaler to transform `X`, with the `.transform()` method.

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X)
Xscale = scaler.transform(X)

#### 1\. Fit a SVC (i.e., linear SVM)

Fitting an SVM can be done using `sklearn.svm`'s `SVC` module. 

from sklearn import svm

Do the fitting here:

my_svc = svm.SVC(C=1, kernel="linear")
my_svc.fit(Xscale, y)

What's the accuracy? Try changing the `C` parameter.

sum(my_svc.predict(Xscale) == y) / len(y)

#### 2\. Fit a radial-basis SVM

Try again, this time with radial SVM. What's the accuracy? Try changing the parameters. 

my_svc = svm.SVC(C=1, kernel="rbf", gamma=100)
my_svc.fit(Xscale, y)
sum(my_svc.predict(Xscale) == y) / len(y)

## Cross validation 
import pandas as pd
import svm
from random import randint

#read data
dataframe=pd.read_csv("tweets_all.csv")
X = dataframe[['tweet']]
y = dataframe[['sentiment']]

best_score = 0  
best_params = {'C': None, 'gamma': None}

#for a preset number of iterations
for i in range(10):
    #try random values for each hyperparameter
    svc = svm.SVC(C=randint(0, 9), gamma=randint(0, 3))
    svc.fit(X, y)
    score = svc.score(Xval, yval)

    if score > best_score:
        best_score = score
        best_params['C'] = C
        best_params['gamma'] = gamma

best_score, best_params  
Ejemplo n.º 11
0
import matplotlib.pyplot as plt
import numpy as np

import svm

plt.axes().set_aspect("equal")
np.random.seed(0)
X0 = np.random.randn(100, 2)
X1 = np.random.randn(100, 2) + np.array([2.5, 3])
y = np.array([1] * 100 + [-1] * 100)
X = np.r_[X0, X1]

model = svm.SVC()
model.fit(X, y)

xmin, xmax = X[:, 0].min(), X[:, 0].max()
ymin, ymax = X[:, 1].min(), X[:, 1].max()

plt.scatter(X0[:, 0], X0[:, 1], color="k", marker="*")
plt.scatter(X1[:, 0], X1[:, 1], color="k", marker="+")
xmesh, ymesh = np.meshgrid(np.linspace(xmin, xmax, 200),
                           np.linspace(ymin, ymax, 200))
Z = model.predict(np.c_[xmesh.ravel(), ymesh.ravel()]).reshape(xmesh.shape)
plt.contour(xmesh, ymesh, Z, levels=[0], colors="k")

print("正しく分類できた数:", (model.predict(X) == y).sum())
plt.show()
Ejemplo n.º 12
0
    pd.set_option('display.width', desired_width)
    roomOcc = pd.read_csv("datatraining.txt")
    roomOcc = roomOcc.drop(["date"], axis=1)
    #print(roomOcc.head())
    #print(roomOcc.shape)
    d = roomOcc.describe()
    print(d)

    #corr = roomOcc.corr()
    #plt.figure(figsize=(10, 10))

    #sns.heatmap(corr, vmax=.8, linewidths=0.01,
    #            square=True, annot=True, cmap='Purples', linecolor="white")
    #plt.title('Correlation between features')

    roomOccTest = pd.read_csv("datatest.txt")
    roomOccTest = roomOccTest.drop(["date"], axis=1)
    #print(roomOccTest.head())

    y_train = roomOcc.pop('Occupancy').values
    y_test = roomOccTest.pop('Occupancy').values
    print(len(y_train))
    print(len(y_test))

    svmModel = svm.SVC(C=0.5)
    #Set the number of training data
    svmModel.fit(roomOcc[:17000], y_train[:17000])
    predict = svmModel.predict(roomOccTest)

    print(accuracy_score(y_test, predict))
Ejemplo n.º 13
0
#     for gamma in np.logspace(-7,2,10):
#         print("gamma : %f, C : %f"%(gamma, C))
def kernel(x,y): 
    term = np.dot(x, y.T)
    for i in range(term.shape[0]):
        for j in range(i+1): 
            k = np.exp(- gamma * np.linalg.norm(x[i]-y[j], ord=2)**2)
            term[i,j] += k
            term[j,i] += k
    return term
# m = svm_train(labels , data,'-s 0 -t 1 -c 5 -g 0.05 -q')
# clf = svm.SVC(kernel='rbf', C=5, gamma=0.05)
# clf.fit(data, labels)
# print("accuracy : %f"%(np.mean(clf.predict(test['imgs'])==test['lbls'])))

clf = svm.SVC(C=C, kernel=kernel)
clf.fit(data, labels)
# print("accuracy : %f"%(np.mean(clf.predict(make_kmat(lambda x, y: np.dot(x, y.T) + np.exp(- 0.05 * np.linalg.norm(x-y, ord=2)**2), test['imgs']))==test['lbls'])))
# sv = clf.support_vectors_
acc = np.mean(clf.predict(test["imgs"])==test['lbls'])
print("accuracy : %f"%(acc))
if acc > best_acc: 
    best_acc = acc
    best_params = (gamma, C)
    print('new best : ' + str(best_params))


# sv = sparse_to_dense_array(m.get_SV(), 784)
# sv = np.asarray(sv)

mnist_2d = PCA(data, 2)
import pandas as pd
import svm

#read data
dataframe = pd.read_fwf('tweets.txt')
X = dataframe[['tweet']]
y = dataframe[['sentiment']]

C_values = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100]
gamma_values = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100]

best_score = 0
best_params = {'C': None, 'gamma': None}

for C in C_values:
    for gamma in gamma_values:
        svc = svm.SVC(C=C, gamma=gamma)
        svc.fit(X, y)
        score = svc.score(Xval, yval)

        if score > best_score:
            best_score = score
            best_params['C'] = C
            best_params['gamma'] = gamma

best_score, best_params
Ejemplo n.º 15
0
print(t_model_building_ending - t_model_building_start)

### SVM Model Statistics and Evaluation
t_SVM_start = time()

#0.5 is for 10 thousand
#1 for 800

X_train, X_test, y_train, y_test = train_test_split(Model_Feature,
                                                    Model_Predictor,
                                                    test_size=0.2,
                                                    random_state=1989)

sys.exit()
# svm classification
clf = svm.SVC(kernel='linear', C=1.0).fit(X_train, y_train)
y_predicted = clf.predict(X_test)
cv = cross_validation.cross_val_score(clf, X_train, y_train, cv=10)
# performance
print("cross validation result(10 fold)")
print(np.mean(cv))
print("Classification report for %s" % clf)
print(metrics.classification_report(y_test, y_predicted))
print("Confusion matrix")
print(metrics.confusion_matrix(y_test, y_predicted))

sys.exit()
"""
sys.exit()

Ejemplo n.º 16
0
import matplotlib.pyplot as plt
import svm
import numpy as np
from sklearn import datasets
"""
Plot using the iris dataset
"""

iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features.
y = np.array(list(map(lambda x: -1 if x == 0 else 1, iris.target)))

model_linear = svm.SVC(C=0.5, kernel=svm.Kernel.linear())
model_poly = svm.SVC(C=0.5, kernel=svm.Kernel.polynomial(3))
model_rbf = svm.SVC(C=0.5, kernel=svm.Kernel.rbf(2))
for model in [model_linear, model_poly, model_rbf]:
    model.fit(X, y)


def plot_svm(model, X, y, figname, title, axes=[0, 10, 0, 10]):
    x0_lin = np.linspace(axes[0], axes[1], 100)
    x1_lin = np.linspace(axes[2], axes[3], 100)
    x0, x1 = np.meshgrid(x0_lin, x1_lin)
    X_mesh = np.c_[x0.ravel(),
                   x1.ravel()]  #convert mesh points into 2d for pred
    y_pred = model.predict(X_mesh).reshape(
        x0.shape)  #predict then convert back to meshgrid for contour plot
    y_decision = model.decision_function(X_mesh).reshape(x0.shape)  #

    plt.figure(figsize=(10, 10))
    plt.plot(X[:, 0][y == -1], X[:, 1][y == -1], 'bo', label='Class: -1')