Beispiel #1
0
import utility
import numpy
from sklearn.datasets import fetch_20newsgroups
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
# Load the dataset
docs_train, docs_test = utility.custom_2class_classifier()

#SVM ->
svm = SVC(kernel='linear', probability=True, random_state=40)
pipeline_svm = utility.pipeline_setup(svm) #pipeline_svm obj to be used in all svm algos
pipeline_svm_fitted = pipeline_svm.fit(docs_train.data, docs_train.target)
# svm_predict = pipeline_svm_fitted.predict(docs_test.data)
# utility.print_stats(docs_test.target, svm_predict, 'SVM Normal')
# utility.draw_roc_curve(docs_test.target, pipeline_svm_fitted.predict_proba(docs_test.data)[:, 1])

#Soft margin SVM ->
#confirm this part, not sure of any other way to implement soft margin SVM
params = {
    'learning_algo__gamma': [1e-3, 1e3] #10^-3 to 10^3
}
svm_soft_margin = GridSearchCV(pipeline_svm, params, cv=5)
svm_soft_margin_fitted = svm_soft_margin.fit(docs_train.data, docs_train.target)
svm_soft_margin_predict = svm_soft_margin_fitted.predict(docs_test.data)
utility.print_stats(docs_test.target, svm_soft_margin_predict, 'Soft Margin SVM')
utility.draw_roc_curve(docs_test.target, svm_soft_margin_fitted.predict_proba(docs_test.data)[:, 1])

best_params = svm_soft_margin.best_estimator_.get_params()
for param_name in sorted(params.keys()):
Beispiel #2
0
import utility
import numpy
from sklearn.datasets import fetch_20newsgroups
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
# Load the dataset
docs_train, docs_test = utility.custom_2class_classifier()

#SVM ->
svm = SVC(kernel='linear', probability=True, random_state=40)
pipeline_svm = utility.pipeline_setup(
    svm)  #pipeline_svm obj to be used in all svm algos
pipeline_svm_fitted = pipeline_svm.fit(docs_train.data, docs_train.target)
# svm_predict = pipeline_svm_fitted.predict(docs_test.data)
# utility.print_stats(docs_test.target, svm_predict, 'SVM Normal')
# utility.draw_roc_curve(docs_test.target, pipeline_svm_fitted.predict_proba(docs_test.data)[:, 1])

#Soft margin SVM ->
#confirm this part, not sure of any other way to implement soft margin SVM
params = {
    'learning_algo__gamma': [1e-3, 1e3]  #10^-3 to 10^3
}
svm_soft_margin = GridSearchCV(pipeline_svm, params, cv=5)
svm_soft_margin_fitted = svm_soft_margin.fit(docs_train.data,
                                             docs_train.target)
svm_soft_margin_predict = svm_soft_margin_fitted.predict(docs_test.data)
utility.print_stats(docs_test.target, svm_soft_margin_predict,
                    'Soft Margin SVM')
utility.draw_roc_curve(
from sklearn.datasets import fetch_20newsgroups
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
import utility

categories = [
    'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'misc.forsale',
    'soc.religion.christian'
]
docs_train = fetch_20newsgroups(
    subset='train', categories=categories, shuffle=True,
    random_state=42)  #, remove=('headers','footers','quotes'))
docs_test = fetch_20newsgroups(
    subset='test', categories=categories, shuffle=True,
    random_state=42)  #, remove=('headers','footers','quotes'))

model = utility.pipeline_setup(GaussianNB())
model.fit(docs_train.data, docs_train.target)
# print(model)
# make predictions
expected = docs_test.target
predicted = model.predict(docs_test.data)

utility.print_stats(expected, predicted, 'Naive Bayes Multiclass')
Beispiel #4
0
goog = utility.get_news_prices('google')
goog.append(utility.get_news_prices('microsoft'))
goog.append(utility.get_news_prices('apple'))
goog.append(utility.get_news_prices('yahoo'))
goog.append(utility.get_news_prices('adobe'))
goog.append(utility.get_news_prices('ford'))

# Select model of computation:
# model = neural_network.MLPRegressor([len(verbnet.classids()), 200, 8], 'relu', 'adam', 0.0001, 200, 'constant', 0.001, 0.5, 200,
#                                      True, None, 0.0001, False, False, 0.9, True, False, 0.1, 0.9, 0.999, 1e-08)

# model = RandomForestRegressor(n_estimators=50, max_features=30, max_depth=9, n_jobs=1)
model = SVC(kernel='linear', probability=True, random_state=40)
# model = linear_model.LinearRegression()

model = utility.pipeline_setup(model)

# model_fitted = model.fit(goog['message'], goog['Threshold Change'])

# Select columns:
x = goog.message.apply(lambda sentence: utility.get_feature_vector(sentence+".")[0][0])
# x.to_csv('data/google_msg_id.csv')
# x = pandas.read_csv('data/google_msg_id.csv')
# print x
# x = goog['message']
#x = x.apply(lambda i: utility.one_hot(i))
# array = numpy.zeros((len(x), len(verbnet.classids())))
# for results in range(len(x)):
#     for i in x[results]:
#         # print i
#         array[results][i] = 1
Beispiel #5
0
from sklearn.datasets import fetch_20newsgroups
import matplotlib.pyplot as pyplot
from sklearn.naive_bayes import GaussianNB
import utility

docs_train, docs_test = utility.custom_2class_classifier()

model = utility.pipeline_setup(GaussianNB())
model_fitted = model.fit(docs_train.data, docs_train.target)
#print(model)
# make predictions
expected = docs_test.target
predicted = model_fitted.predict(docs_test.data)
utility.print_stats(expected, predicted, 'Naive Bayes Basic')
utility.draw_roc_curve(expected, model_fitted.predict_proba(docs_test.data)[:, 1])
Beispiel #6
0
]
docs_train = fetch_20newsgroups(subset='train',
                                categories=categories,
                                shuffle=True,
                                random_state=42)
docs_test = fetch_20newsgroups(subset='test',
                               categories=categories,
                               shuffle=True,
                               random_state=42)

svm_basic = SVC(kernel='linear',
                class_weight='balanced',
                probability=True,
                random_state=40)
svm_onerest = OneVsRestClassifier(svm_basic)
pipeline_svm_onerest = utility.pipeline_setup(svm_onerest)
pipeline_svm_fitted = pipeline_svm_onerest.fit(docs_train.data,
                                               docs_train.target)
svm_predict = pipeline_svm_fitted.predict(docs_test.data)
utility.print_stats(docs_test.target, svm_predict, 'SVM OneVSOne')

svm_weighted = SVC(
    kernel='linear',
    class_weight='balanced',
    probability=True,
    random_state=40
)  #balanced param to make sure both docs have same no. of samples in onevsone
svm_oneone = OneVsOneClassifier(svm_weighted)
pipeline_svm_oneone = utility.pipeline_setup(svm_oneone)
pipeline_svm_fitted = pipeline_svm_oneone.fit(docs_train.data,
                                              docs_train.target)
Beispiel #7
0
import utility
from sklearn.datasets import fetch_20newsgroups
from sklearn.svm import SVC
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from sklearn import metrics

categories = ['comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'misc.forsale', 'soc.religion.christian']
docs_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)
docs_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42)

svm_basic = SVC(kernel='linear', class_weight='balanced', probability=True, random_state=40)
svm_onerest = OneVsRestClassifier(svm_basic)
pipeline_svm_onerest = utility.pipeline_setup(svm_onerest)
pipeline_svm_fitted = pipeline_svm_onerest.fit(docs_train.data, docs_train.target)
svm_predict = pipeline_svm_fitted.predict(docs_test.data)
utility.print_stats(docs_test.target, svm_predict, 'SVM OneVSOne')


svm_weighted = SVC(kernel='linear', class_weight='balanced', probability=True,random_state=40) #balanced param to make sure both docs have same no. of samples in onevsone
svm_oneone = OneVsOneClassifier(svm_weighted)
pipeline_svm_oneone  = utility.pipeline_setup(svm_oneone)
pipeline_svm_fitted = pipeline_svm_oneone.fit(docs_train.data, docs_train.target)
svm_predict = pipeline_svm_fitted.predict(docs_test.data)
utility.print_stats(docs_test.target, svm_predict, 'SVM OneVSRest')