# -*- coding: utf-8 -*- """ Created on Fri Jan 29 19:23:04 2016 @author: dheepan.ramanan """ from sklearn import svm from sklearn.feature_selection import SelectPercentile from sklearn.pipeline import Pipeline from sklearn.feature_selection import f_classif from themes_df import oscar_features, labels from sklearn.cross_validation import cross_val_score from sklearn import metrics X = oscar_features.drop(["index", "year"], 1) feature_names = X.columns.values y = labels anova_filter = SelectPercentile(f_classif) clf = svm.SVC(probability=True) accuracy = [] percentile_range = range(1, 100, 2) for p in percentile_range: anova_filter = SelectPercentile(f_classif, percentile=p) anova_svm = Pipeline([('anova', anova_filter), ('svc', clf)]) anova_svm.set_params(anova__p=p, svc__C=1).fit(X, y) prediction = anova_svm.predict(X) score = anova_svm.score(X, y)
# -*- coding: utf-8 -*- """ Created on Mon Feb 8 11:15:02 2016 @author: dheepan.ramanan """ from __future__ import division from sklearn.ensemble import RandomForestClassifier from sklearn import metrics from themes_df import oscar_features, labels import matplotlib.pyplot as plt import seaborn as sns import pandas as pd #lets drop some of the confounding rating variables X = oscar_features.drop(["index","year","releaseyear","totalfloat","reviewagg"],1) feature_names = X.columns.values y = labels scores = [] #tree test percentile = range(5,150,1) per_oob_error = [] features_scores = {} #test for feature inclusion for p in percentile: clf = RandomForestClassifier(n_estimators=125, max_features =p/len(X.columns), oob_score= True, n_jobs=-1) clf.fit(X,y) prediction = clf.predict(X) score = clf.score(X,y) scores.append(score)
Created on Fri Jan 29 19:23:04 2016 @author: dheepan.ramanan """ from sklearn import svm from sklearn.feature_selection import SelectPercentile from sklearn.pipeline import Pipeline from sklearn.feature_selection import f_classif from themes_df import oscar_features, labels from sklearn.cross_validation import cross_val_score from sklearn import metrics X = oscar_features.drop(["index","year"],1) feature_names = X.columns.values y = labels anova_filter = SelectPercentile(f_classif) clf = svm.SVC(probability=True) accuracy=[] percentile_range = range(1,100,2) for p in percentile_range: anova_filter = SelectPercentile(f_classif, percentile=p) anova_svm = Pipeline([('anova', anova_filter), ('svc', clf)]) anova_svm.set_params(anova__p=p, svc__C=1).fit(X, y) prediction = anova_svm.predict(X) score = anova_svm.score(X, y)