예제 #1
0
def SVM_Classifier(x_train, y_train, x_test, y_test):
    tic()
    clf = svm.SVC(C=2, probability=True)
    clf.fit(x_train, y_train)
    toc()

    print 'Test Accuracy: %.2f' % clf.score(x_train, y_train)

    #Create ROC curve
    tic()
    pred_probas = clf.predict_proba(x_train)[:, 1]  #score
    toc()

    # y_train = np.array(y_train)
    print type(y_train)
    print type(pred_probas)
    fpr, tpr, _ = metrics.roc_curve(y_train, pred_probas)
    roc_auc = metrics.auc(fpr, tpr)
    plt.plot(fpr, tpr, label='area = %.2f' % roc_auc)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.legend(loc='lower right')
    plt.savefig(fdir + 'image/svm_roc')
    plt.show()
    plt.close()
예제 #2
0
def pca_reducing(size, x):
    tic()
    pca = PCA(n_components=size)
    pca.fit(x)
    toc()
    #print pca.explained_variance_ratio_

    # PCA drawing
    plt.figure(1, figsize=(4, 3))
    plt.clf()
    plt.axes([.2, .2, .7, .7])
    plt.plot(pca.explained_variance_, linewidth=2)
    plt.axis('tight')
    plt.xlabel('n_components')
    plt.ylabel('explained_variance_')
    plt.savefig(fdir + 'image/train_pca')
    # plt.show()
    plt.close()

    # get 100 dimensions according to the pca drawing
    x_pca = PCA(n_components=200).fit_transform(x)

    return x_pca
예제 #3
0
# coding=utf-8
"""
Created on Mon Aug 13 11:10:39 2018

@author: 95647
"""
import urllib
from urllib.parse import quote
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import pandas as pd
import string
from mytictoc import tic, toc

tic()
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
url = "https://book.douban.com/tag/历史"
s = quote(url,safe=string.printable)
req = urllib.request.Request(s, headers=headers)
html = urlopen(req)
# print(html.read().decode("utf-8"))
bsObj = BeautifulSoup(html,'lxml')
items = bsObj.findAll("li",class_="subject-item")
book_info = []
for item in items:
    info = []
    titles = item.find("a",title = re.compile(".*")).contents
    if len(titles)> 1 :
        bookname = str(titles[0].strip()) + str(titles[1].text.strip())
    else: