Example #1
0
#!/usr/bin/env python 
# -*- coding:utf-8 -*-
__author__="luheng"
import numpy as np
import pandas as pd
import time
import sys
sys.path.append("..")
import pre_load.trymydata as newdata
x=newdata.mydata()[0]
y=newdata.mydata()[1]
print x.shape
#用方差设定阀值,用在伯努利分布
from sklearn.feature_selection import VarianceThreshold
sel=VarianceThreshold(threshold=100)
x_new=sel.fit_transform(x)
print x_new.shape
#选择对结果最有用的k个属性
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
sel2 = SelectKBest(chi2,k=6)
x_new2=sel2.fit_transform(x,y)
print x_new2
Example #2
0
#!/usr/bin/env python 
# -*- coding:utf-8 -*-
__author__="luheng"
import numpy as np
import pandas as pd
import time
from sklearn import svm,grid_search
import time
import sys
sys.path.append("..")
import pre_load.trymydata as data
begin=time.time()
predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
data2use=data.mydata()
x=data2use[0]
y=data2use[1]
test=data2use[2]
sv=svm.SVC()
parameters={"kernel":("rbf","linear"),"C":[1,2]}
clf=grid_search.GridSearchCV(sv,parameters)
clf.fit(x,y)
end=time.time()
print clf.best_params_
print clf.best_estimator_
print clf.best_score_
print  "花费时间%.2fs" %(end-begin)