def loadData(self): """ read the trainingset and testset from the csv files and prepare the solver """ # Read data from given csv file data = readCsvData(self.trainFile) # Read test data testdata = readCsvData(self.testFile) # prepare the solver with the data self.solver.loadData(data, testdata)
def crossValidate(self, splitAt = 0.7): """ this function takes the training set where the right answer is known, splits it 70/30, takes the bigger part as the normal training set and the smaller part as test set. afterwards it compares the answer from the solver with the known right answer """ #load the training set from the csv file data = readCsvData(self.trainFile) #define where to split the dataset. Rounds down splitter = (int)(splitAt * len(data)) trainData = data[:splitter] validationData = data[splitter:] # run the solving algorythm # use the validation set without the solution in the first row self.solver.loadData(trainData, validationData[:,1:]) self.sol = self.solver.solve() self.validationData = validationData # check the solution rightSol = validationData[:,0] == self.sol[:,1] rightPercent = np.bincount(rightSol)[1] / len(rightSol) return rightPercent, rightSol