def classify(): for i in range(0, 200): start = time.time() if i == 0: iter_times = 1 else: iter_times = i * 50 print 'iter_times{0}'.format(str(iter_times)) classifier = Perceptron(n_iter=iter_times, eta0=0.001) classifier.fit_transform(data, label) predictions = classifier.predict(test) reportname = './report/{0}.txt'.format('report_{0}'.format( str(iter_times))) report = open(reportname, 'w') r = classification_report(testLabel, predictions) fpr, tpr, thresholds = roc_curve(testLabel, predictions, pos_label=2) report.write(r) end = time.time() report.write('time{0}'.format(str(end - start))) report.close()
from sklearn.linear_model import Perceptron categories = ['rec.sport.hockey', 'rec.sport.baseball', 'rec.autos'] newsgroups_train = fetch_20newsgroups(subset='train', categories=categories, remove=('headers', 'footers', 'quotes')) newsgroups_test = fetch_20newsgroups(subset='test', categories=categories, remove=('headers', 'footers', 'quotes')) vectorizer = TfidfVectorizer() X_train = vectorizer.fit_transform(newsgroups_train.data) X_test = vectorizer.transform(newsgroups_test.data) classifier = Perceptron(n_iter=100, eta0=0.1) classifier.fit_transform(X_train, newsgroups_train.target) predictions = classifier.predict(X_test) print classification_report(newsgroups_test.target, predictions) ################# Example ################# """ """ """ sudo apt-get remove libopenblas-base openblas (required for video contextualization) is incompatible with scipy. """ import numpy as np import matplotlib matplotlib.use('Qt4Agg')
# coding=utf-8 from sklearn.metrics import f1_score, classification_report from sklearn.linear_model import Perceptron import numpy as np import time, os from sklearn.metrics import roc_curve from matplotlib import pyplot as plt import numpy as np data = np.loadtxt('./data/TrainSamples.csv', delimiter=",") print data label = np.loadtxt('./data/TrainLabels.csv', delimiter=",") print label test = np.loadtxt('./data/TestSamples1.csv', delimiter=',') testLabel = np.loadtxt('./data/TestLabels1.csv', delimiter=',') print 'iter_times{0}'.format(str(1000)) start = time.time() classifier = Perceptron(n_iter=1000, eta0=0.001) classifier.fit_transform(data, label) predictions = classifier.predict(test) reportname = 'Perceptron.txt' report = open(reportname, 'w') r = classification_report(testLabel, predictions) fpr, tpr, thresholds = roc_curve(testLabel, predictions, pos_label=2) report.write(r) end = time.time() report.write('time{0}'.format(str(end - start))) report.close()
#df = df._get_numeric_data() #msk = np.random.rand(len(df)) < 0.8 train = df train_target = train['Survived'] train = train.drop('Survived', axis=1) #print train test = df_test #test_target = test['Survived'] #test = test.drop('Survived',axis=1) print len(train) print len(test) #print newsgroups_train.filenames #print newsgroups_test.filenames.shape #print vectorizer #print X_train classifier = Perceptron(n_iter=5000, eta0=0.3) classifier.fit_transform(train, train_target) predictions = classifier.predict(test) #print predictions #print classification_report(test_target, predictions) print predictions np.savetxt('test_final.csv', predictions) #cm = confusion_matrix(test_target,predictions) #print cm #print classifier.score(train,train_target)
plt.xlabel('Proportion of the day spent sleeping') plt.ylabel('Proportion of the day spent being grumpy') plt.title('Kittens and Adult Cats') plt.show() #Perceptron categories = ['rec.sport.hockey', 'rec.sport.baseball', 'rec.autos'] newsgroups_train = fetch_20newsgroups(subset='train', categories=categories, remove=('headers', 'footers', 'quotes')) newsgroups_test = fetch_20newsgroups(subset='test', categories=categories, remove=('headers', 'footers', 'quotes')) vectorizer = TfidfVectorizer() X_train = vectorizer.fit_transform(newsgroups_train.data) X_test = vectorizer.transform(newsgroups_test.data) classifier = Perceptron(n_iter=100, eta0=0.1) classifier.fit_transform(X_train, newsgroups_train.target) predictions = classifier.predict(X_test) print classification_report(newsgroups_test.target, predictions) """ Output seen precision recall f1-score support 0 0.89 0.87 0.88 396 1 0.87 0.78 0.82 397 2 0.79 0.88 0.83 399 avg / total 0.85 0.85 0.85 1192 """ #plot the output import matplotlib
#df = df._get_numeric_data() #msk = np.random.rand(len(df)) < 0.8 train = df train_target = train['Survived'] train = train.drop('Survived',axis=1) #print train test = df_test #test_target = test['Survived'] #test = test.drop('Survived',axis=1) print len(train) print len(test) #print newsgroups_train.filenames #print newsgroups_test.filenames.shape #print vectorizer #print X_train classifier = Perceptron(n_iter=5000, eta0=0.3) classifier.fit_transform(train, train_target ) predictions = classifier.predict(test) #print predictions #print classification_report(test_target, predictions) print predictions np.savetxt('test_final.csv',predictions) #cm = confusion_matrix(test_target,predictions) #print cm #print classifier.score(train,train_target)