def getMainFeatures(self, train, indexes, goal, samples=5000): size = 300 np.random.seed(goal) beginning = indexes[goal] end = len(train) if goal == len(indexes) - 1 else indexes[goal + 1] print str(samples) + " sequences of " + str(goal) toChooseGoal = np.random.randint(beginning, end - size, size=samples) print str(samples) + " sequences of other stuff than " + str(goal) toChooseNotGoalRange = len(train) - size*(len(indexes)-1) - end + beginning toChooseNotGoal = np.random.randint(0, toChooseNotGoalRange, size=samples) self.translateArray(toChooseNotGoal, indexes, goal, size, end-beginning) finalToChooseList = np.concatenate((toChooseGoal, toChooseNotGoal)) mapArray = np.array([train[i:i + size, :] for i in finalToChooseList]) print "Calculating stats for " + str(goal) mainFeatures = getStats(mapArray) print "Done with " + str(goal) return mainFeatures
def getTestStats(self): sequence = 0 indexSeq = 0 index = 0 print "Reading csv" test = pd.read_csv('test.csv') test = test.values arrayTest = [] for element in range(len(test)): if test[element][4] != sequence: if sequence != 0: print "Sequence "+str(index) arrayTest.append(test[indexSeq:element]) indexSeq = element index += 1 sequence = test[element][4] print "Sequence "+str(index) arrayTest.append(test[indexSeq:len(test)]) print "Calculating stats" stats = getStats(np.array(arrayTest)) np.save("testStats", stats) return stats
def getTestStats(self): sequence = 0 indexSeq = 0 index = 0 print "Reading csv" test = pd.read_csv('test.csv') test = test.values arrayTest = [] for element in range(len(test)): if test[element][4] != sequence: if sequence != 0: print "Sequence " + str(index) arrayTest.append(test[indexSeq:element]) indexSeq = element index += 1 sequence = test[element][4] print "Sequence " + str(index) arrayTest.append(test[indexSeq:len(test)]) print "Calculating stats" stats = getStats(np.array(arrayTest)) np.save("testStats", stats) return stats