コード例 #1
0
ファイル: svm_model.py プロジェクト: dheepanr/OscarsData
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 29 19:23:04 2016

@author: dheepan.ramanan
"""

from sklearn import svm
from sklearn.feature_selection import SelectPercentile
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import f_classif
from themes_df import oscar_features, labels
from sklearn.cross_validation import cross_val_score
from sklearn import metrics

X = oscar_features.drop(["index", "year"], 1)
feature_names = X.columns.values
y = labels

anova_filter = SelectPercentile(f_classif)
clf = svm.SVC(probability=True)
accuracy = []
percentile_range = range(1, 100, 2)

for p in percentile_range:

    anova_filter = SelectPercentile(f_classif, percentile=p)
    anova_svm = Pipeline([('anova', anova_filter), ('svc', clf)])
    anova_svm.set_params(anova__p=p, svc__C=1).fit(X, y)
    prediction = anova_svm.predict(X)
    score = anova_svm.score(X, y)
コード例 #2
0
ファイル: randomforrest.py プロジェクト: dheepanr/OscarsData
# -*- coding: utf-8 -*-
"""
Created on Mon Feb  8 11:15:02 2016

@author: dheepan.ramanan
"""
from __future__ import division
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from themes_df import oscar_features, labels
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

#lets drop some of the confounding rating variables
X = oscar_features.drop(["index","year","releaseyear","totalfloat","reviewagg"],1)
feature_names = X.columns.values
y = labels

scores = [] 
#tree test
percentile = range(5,150,1)
per_oob_error = []
features_scores = {}
#test for feature inclusion
for p in percentile:
	clf = RandomForestClassifier(n_estimators=125, max_features =p/len(X.columns), oob_score= True, n_jobs=-1)
	clf.fit(X,y)
	prediction = clf.predict(X)
	score = clf.score(X,y)
	scores.append(score)
コード例 #3
0
ファイル: svm_model.py プロジェクト: dheepanr/OscarsData
Created on Fri Jan 29 19:23:04 2016

@author: dheepan.ramanan
"""


from sklearn import svm
from sklearn.feature_selection import SelectPercentile
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import f_classif
from themes_df import oscar_features, labels
from sklearn.cross_validation import cross_val_score
from sklearn import metrics


X = oscar_features.drop(["index","year"],1)
feature_names = X.columns.values
y = labels 

anova_filter = SelectPercentile(f_classif)
clf = svm.SVC(probability=True)
accuracy=[]
percentile_range = range(1,100,2)

for p in percentile_range:
	
	anova_filter = SelectPercentile(f_classif, percentile=p)
	anova_svm = Pipeline([('anova', anova_filter), ('svc', clf)])
	anova_svm.set_params(anova__p=p, svc__C=1).fit(X, y)
	prediction = anova_svm.predict(X)
	score = anova_svm.score(X, y)