-
Notifications
You must be signed in to change notification settings - Fork 1
/
Trainer.py
95 lines (85 loc) · 3.67 KB
/
Trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
'''
Created on 2 Aug 2013
@author: apuigdom
'''
from features import getStats
import pickle
import pandas as pd
import numpy as np
from sklearn.ensemble.forest import RandomForestRegressor
from Predictor2 import Predictor2
class Trainer:
def __init__(self):
print "Reading the training data..."
self.train = pd.read_csv('train.csv')
def getSeparationIndexes(self, train):
index = 0
sequence = 0
indexList = []
for element in range(len(train)):
if train['Device'][element] != sequence:
index = element
indexList.append(index)
print index
sequence = train['Device'][element]
return np.array(indexList)
def getSeparationDevices(self, train):
index = 0
sequence = 0
indexList = []
for element in range(len(train)):
if train['Device'][element] != sequence:
index = train['Device'][element]
indexList.append(index)
print index
sequence = train['Device'][element]
return np.array(indexList)
def getTrainData(self, splittedData, i):
toReturn = np.delete(splittedData, i, 0)
toReturn = tuple(tuple(x) for x in toReturn)
toReturn = np.concatenate(toReturn)
return toReturn
def getAttributes(self, train, indexes, goal, samples=5000):
targetVect = [1] * (samples)
targetVect.extend([0] * (samples))
trainVect = train[goal]
return (trainVect, np.array(targetVect))
def translateArray(self, toChooseList, indexes, goal, size, indexRange):
for i, index in enumerate(indexes[1:]):
if i == goal:
toChooseList[toChooseList >= indexes[i]] += indexRange
else:
toChooseList[toChooseList >= index-size] += size
def getMainFeatures(self, train, indexes, goal, samples=5000):
size = 300
np.random.seed(goal)
beginning = indexes[goal]
end = len(train) if goal == len(indexes) - 1 else indexes[goal + 1]
print str(samples) + " sequences of " + str(goal)
toChooseGoal = np.random.randint(beginning, end - size, size=samples)
print str(samples) + " sequences of other stuff than " + str(goal)
toChooseNotGoalRange = len(train) - size*(len(indexes)-1) - end + beginning
toChooseNotGoal = np.random.randint(0, toChooseNotGoalRange, size=samples)
self.translateArray(toChooseNotGoal, indexes, goal, size, end-beginning)
finalToChooseList = np.concatenate((toChooseGoal, toChooseNotGoal))
mapArray = np.array([train[i:i + size, :] for i in finalToChooseList])
print "Calculating stats for " + str(goal)
mainFeatures = getStats(mapArray)
print "Done with " + str(goal)
return mainFeatures
def run(self):
print "Reading device separations..."
indexes = np.load("indexesTrain.npy")
self.train = self.train.values
print "Getting attributes..."
trainFeatures = [self.getMainFeatures(self.train, indexes, i) for i in range(len(indexes))]
for i in range(len(indexes)):
(trainVect, targetVect) = self.getAttributes(trainFeatures, indexes, i)
classifier = RandomForestRegressor(n_estimators=500, verbose=2, n_jobs=4, random_state=1)
classifier.fit(trainVect, targetVect)
pickle.dump(classifier, open("models/models" + str(i) + ".mod", "w"))
if __name__ == "__main__":
trainer = Trainer()
trainer.run()
predictor = Predictor2()
predictor.run()