Ejemplo n.º 1
0
__author__ = 'kunal'

import csv
from sklearn.ensemble import RandomForestClassifier as rfc
import auxiliary

trainDf = auxiliary.initialise_train(False)
# auxiliary.computeMean(Category)
# select all columns except
# Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y,Year,Week,Hour
trainDf = trainDf.drop(['Dates', 'Descript', 'Resolution', 'Address'], axis=1)

# Test data
testDf = auxiliary.initialise_test(False)
ids = testDf['Id'].values
# Id,Dates,DayOfWeek,PdDistrict,Address,X,Y,Year,Week,Hour
testDf = testDf.drop(['Id', 'Dates', 'Address'], axis=1)

# Random Forest Algorithm
print list(trainDf.columns.values)
print list(testDf.columns.values)
#print list(trainDf.X.values)

# back to numpy format
trainData = trainDf.values
testData = testDf.values

print 'Training...'
forest = rfc(n_estimators=25)
forest = forest.fit(trainData[0::,1::], trainData[0::,0])
Ejemplo n.º 2
0
from sklearn.feature_selection import f_classif, SelectKBest
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
warnings.simplefilter('ignore', DeprecationWarning)
warnings.simplefilter('ignore', UserWarning)
warnings.simplefilter('ignore', Warning)

trainDf = auxiliary.initialise_train(True)

trainDf = trainDf.drop(['Descript', 'Resolution', 'Address', 'Dates'], axis=1)
X, y = train_test_split(trainDf, train_size=.75)

# Test data
testDf = auxiliary.initialise_test(True)
ids = testDf['Id'].values
testDf = testDf.drop(['Id', 'Address', 'Dates'], axis=1)

# Attributes used in the model
print list(trainDf.columns.values)
print list(testDf.columns.values)

# back to numpy format
trainData = trainDf.values
testData = testDf.values

# Feature Selection:
# The Recursive Feature Elimination (RFE) method is a feature selection approach. It works by recursively removing
# attributes and building a model on those attributes that remain. It uses the model accuracy to identify which
# attributes (and combination of attributes) contribute the most to predicting the target attribute.
Ejemplo n.º 3
0
import csv
from sklearn.linear_model import LogisticRegression as lr
import auxiliary
import expolatory_graphs

trainDf = auxiliary.initialise_train(False)

# select all columns except
# Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y,Year,Week,Hour
trainDf = trainDf.drop(
    ['Dates', 'Descript', 'DayOfWeek', 'Resolution', 'Address'], axis=1)

# Test data
testDf = auxiliary.initialise_test(False)
ids = testDf['Id'].values
# Id,Dates,DayOfWeek,PdDistrict,Address,X,Y,Year,Week,Hour
testDf = testDf.drop(['Id', 'Dates', 'Address', 'DayOfWeek'], axis=1)

# Random Forest Algorithm
print list(trainDf.columns.values)
print list(testDf.columns.values)
#print list(trainDf.X.values)

# back to numpy format
trainData = trainDf.values
testData = testDf.values

print 'Training...'
logit = lr()
logit = logit.fit(trainData[0::, 1::], trainData[0::, 0])