#!/usr/bin/env python # -*- coding:utf-8 -*- __author__="luheng" import numpy as np import pandas as pd import time import sys sys.path.append("..") import pre_load.trymydata as newdata x=newdata.mydata()[0] y=newdata.mydata()[1] print x.shape #用方差设定阀值,用在伯努利分布 from sklearn.feature_selection import VarianceThreshold sel=VarianceThreshold(threshold=100) x_new=sel.fit_transform(x) print x_new.shape #选择对结果最有用的k个属性 from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import chi2 sel2 = SelectKBest(chi2,k=6) x_new2=sel2.fit_transform(x,y) print x_new2
#!/usr/bin/env python # -*- coding:utf-8 -*- __author__="luheng" import numpy as np import pandas as pd import time from sklearn import svm,grid_search import time import sys sys.path.append("..") import pre_load.trymydata as data begin=time.time() predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"] data2use=data.mydata() x=data2use[0] y=data2use[1] test=data2use[2] sv=svm.SVC() parameters={"kernel":("rbf","linear"),"C":[1,2]} clf=grid_search.GridSearchCV(sv,parameters) clf.fit(x,y) end=time.time() print clf.best_params_ print clf.best_estimator_ print clf.best_score_ print "花费时间%.2fs" %(end-begin)