__author__ = 'kunal' import csv from sklearn.ensemble import RandomForestClassifier as rfc import auxiliary trainDf = auxiliary.initialise_train(False) # auxiliary.computeMean(Category) # select all columns except # Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y,Year,Week,Hour trainDf = trainDf.drop(['Dates', 'Descript', 'Resolution', 'Address'], axis=1) # Test data testDf = auxiliary.initialise_test(False) ids = testDf['Id'].values # Id,Dates,DayOfWeek,PdDistrict,Address,X,Y,Year,Week,Hour testDf = testDf.drop(['Id', 'Dates', 'Address'], axis=1) # Random Forest Algorithm print list(trainDf.columns.values) print list(testDf.columns.values) #print list(trainDf.X.values) # back to numpy format trainData = trainDf.values testData = testDf.values print 'Training...' forest = rfc(n_estimators=25) forest = forest.fit(trainData[0::,1::], trainData[0::,0])
from sklearn.feature_selection import f_classif, SelectKBest from sklearn.feature_selection import RFE from sklearn.linear_model import LogisticRegression import matplotlib.pyplot as plt from sklearn.ensemble import AdaBoostClassifier warnings.simplefilter('ignore', DeprecationWarning) warnings.simplefilter('ignore', UserWarning) warnings.simplefilter('ignore', Warning) trainDf = auxiliary.initialise_train(True) trainDf = trainDf.drop(['Descript', 'Resolution', 'Address', 'Dates'], axis=1) X, y = train_test_split(trainDf, train_size=.75) # Test data testDf = auxiliary.initialise_test(True) ids = testDf['Id'].values testDf = testDf.drop(['Id', 'Address', 'Dates'], axis=1) # Attributes used in the model print list(trainDf.columns.values) print list(testDf.columns.values) # back to numpy format trainData = trainDf.values testData = testDf.values # Feature Selection: # The Recursive Feature Elimination (RFE) method is a feature selection approach. It works by recursively removing # attributes and building a model on those attributes that remain. It uses the model accuracy to identify which # attributes (and combination of attributes) contribute the most to predicting the target attribute.
import csv from sklearn.linear_model import LogisticRegression as lr import auxiliary import expolatory_graphs trainDf = auxiliary.initialise_train(False) # select all columns except # Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y,Year,Week,Hour trainDf = trainDf.drop( ['Dates', 'Descript', 'DayOfWeek', 'Resolution', 'Address'], axis=1) # Test data testDf = auxiliary.initialise_test(False) ids = testDf['Id'].values # Id,Dates,DayOfWeek,PdDistrict,Address,X,Y,Year,Week,Hour testDf = testDf.drop(['Id', 'Dates', 'Address', 'DayOfWeek'], axis=1) # Random Forest Algorithm print list(trainDf.columns.values) print list(testDf.columns.values) #print list(trainDf.X.values) # back to numpy format trainData = trainDf.values testData = testDf.values print 'Training...' logit = lr() logit = logit.fit(trainData[0::, 1::], trainData[0::, 0])