#!/usr/bin/env python
# uses GridSearchCV to optimize SVM
import numpy as np
import sklearn.svm as svm
import sklearn.grid_search as gs
import lib.loader as ld
import sklearn.feature_extraction.text as tfidf

# training data
trainx, trainy = ld.loadtrain('data/trainingdata.txt')
trainx2 = tfidf.TfidfTransformer().fit_transform(trainx)
parameters = {'C': np.arange(1, 101, 5), 'penalty': ['l2', 'l1']}
mdl = svm.LinearSVC(dual=False)
clf = gs.GridSearchCV(mdl, parameters, n_jobs=-1, cv=5)
clf.fit(trainx2, trainy)

# print results
print(clf.best_score_)  # best score
print(clf.best_params_)  # best params
#!/usr/bin/env python
# uses GridSearchCV to optimize SVM
import numpy as np
import sklearn.svm as svm
import sklearn.grid_search as gs
import lib.loader as ld
import sklearn.feature_extraction.text as tfidf

# training data
trainx, trainy = ld.loadtrain('data/trainingdata.txt')
trainx2 = tfidf.TfidfTransformer().fit_transform(trainx)
parameters = {'C': np.arange(1, 101, 5),
        'penalty': ['l2', 'l1']}
mdl = svm.LinearSVC(dual=False)
clf = gs.GridSearchCV(mdl, parameters, n_jobs=-1, cv=5)
clf.fit(trainx2, trainy)

# print results
print(clf.best_score_)              # best score
print(clf.best_params_)             # best params