예제 #1
0
def classify():
    for i in range(0, 200):
        start = time.time()
        if i == 0:
            iter_times = 1
        else:
            iter_times = i * 50
        print 'iter_times{0}'.format(str(iter_times))
        classifier = Perceptron(n_iter=iter_times, eta0=0.001)
        classifier.fit_transform(data, label)
        predictions = classifier.predict(test)
        reportname = './report/{0}.txt'.format('report_{0}'.format(
            str(iter_times)))
        report = open(reportname, 'w')
        r = classification_report(testLabel, predictions)
        fpr, tpr, thresholds = roc_curve(testLabel, predictions, pos_label=2)
        report.write(r)
        end = time.time()
        report.write('time{0}'.format(str(end - start)))
        report.close()
예제 #2
0
from sklearn.linear_model import Perceptron

categories = ['rec.sport.hockey', 'rec.sport.baseball', 'rec.autos']
newsgroups_train = fetch_20newsgroups(subset='train',
                                      categories=categories,
                                      remove=('headers', 'footers', 'quotes'))
newsgroups_test = fetch_20newsgroups(subset='test',
                                     categories=categories,
                                     remove=('headers', 'footers', 'quotes'))

vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(newsgroups_train.data)
X_test = vectorizer.transform(newsgroups_test.data)

classifier = Perceptron(n_iter=100, eta0=0.1)
classifier.fit_transform(X_train, newsgroups_train.target)
predictions = classifier.predict(X_test)
print classification_report(newsgroups_test.target, predictions)

################# Example #################
"""

"""
"""
sudo apt-get remove libopenblas-base
openblas (required for video contextualization)
is incompatible with scipy.
"""
import numpy as np
import matplotlib
matplotlib.use('Qt4Agg')
예제 #3
0
# coding=utf-8
from sklearn.metrics import f1_score, classification_report
from sklearn.linear_model import Perceptron
import numpy as np
import time, os
from sklearn.metrics import roc_curve
from matplotlib import pyplot as plt
import numpy as np

data = np.loadtxt('./data/TrainSamples.csv', delimiter=",")
print data
label = np.loadtxt('./data/TrainLabels.csv', delimiter=",")
print label
test = np.loadtxt('./data/TestSamples1.csv', delimiter=',')
testLabel = np.loadtxt('./data/TestLabels1.csv', delimiter=',')
print 'iter_times{0}'.format(str(1000))
start = time.time()
classifier = Perceptron(n_iter=1000, eta0=0.001)
classifier.fit_transform(data, label)
predictions = classifier.predict(test)
reportname = 'Perceptron.txt'
report = open(reportname, 'w')
r = classification_report(testLabel, predictions)
fpr, tpr, thresholds = roc_curve(testLabel, predictions, pos_label=2)
report.write(r)
end = time.time()
report.write('time{0}'.format(str(end - start)))
report.close()
예제 #4
0
#df = df._get_numeric_data()

#msk = np.random.rand(len(df)) < 0.8
train = df
train_target = train['Survived']
train = train.drop('Survived', axis=1)
#print train
test = df_test
#test_target = test['Survived']
#test = test.drop('Survived',axis=1)

print len(train)
print len(test)
#print newsgroups_train.filenames

#print newsgroups_test.filenames.shape

#print vectorizer
#print X_train

classifier = Perceptron(n_iter=5000, eta0=0.3)
classifier.fit_transform(train, train_target)
predictions = classifier.predict(test)
#print predictions
#print classification_report(test_target, predictions)
print predictions
np.savetxt('test_final.csv', predictions)

#cm = confusion_matrix(test_target,predictions)
#print cm
#print classifier.score(train,train_target)
plt.xlabel('Proportion of the day spent sleeping')
plt.ylabel('Proportion of the day spent being grumpy')
plt.title('Kittens and Adult Cats')
plt.show()

#Perceptron 
categories = ['rec.sport.hockey', 'rec.sport.baseball', 'rec.autos']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories, remove=('headers', 'footers', 'quotes'))
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories, remove=('headers', 'footers', 'quotes'))

vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(newsgroups_train.data)
X_test = vectorizer.transform(newsgroups_test.data)

classifier = Perceptron(n_iter=100, eta0=0.1)
classifier.fit_transform(X_train, newsgroups_train.target)
predictions = classifier.predict(X_test)
print classification_report(newsgroups_test.target, predictions)
"""
Output seen
             precision    recall  f1-score   support

          0       0.89      0.87      0.88       396
          1       0.87      0.78      0.82       397
          2       0.79      0.88      0.83       399

avg / total       0.85      0.85      0.85      1192
"""

#plot the output
import matplotlib
예제 #6
0
#df = df._get_numeric_data()

#msk = np.random.rand(len(df)) < 0.8
train = df
train_target = train['Survived']
train = train.drop('Survived',axis=1)
#print train
test = df_test
#test_target = test['Survived']
#test = test.drop('Survived',axis=1)

print len(train)
print len(test)
#print newsgroups_train.filenames

#print newsgroups_test.filenames.shape

#print vectorizer
#print X_train

classifier = Perceptron(n_iter=5000, eta0=0.3)
classifier.fit_transform(train, train_target )
predictions = classifier.predict(test)
#print predictions
#print classification_report(test_target, predictions)
print predictions
np.savetxt('test_final.csv',predictions)

#cm = confusion_matrix(test_target,predictions)
#print cm
#print classifier.score(train,train_target)