def run(self, _model, _files): R = {} l = len(_files) folds = 10 for y in range(l): training = _files[y] for x in range(l): cfg = Configuration(training, _model, folds) cfg.resultFolder = "results/" + self.id + "/" cfg.tmpFolder = cfg.resultFolder + "tmp/" test = _files[x] csvA = CSV(training) csvA.randomize(self.seed) csvA.createFolds(folds, cfg.tmpFolder) csvB = CSV(test) csvB.randomize(self.seed) csvB.createFolds(folds, cfg.tmpFolder) cv = CrossValidation(cfg) cv.model = _model cv.folds = folds cv.id = str(y) + "_" + str(x) r = cv.run(csvA.id, csvB.id) results = np.hstack([r.data.mean(0), r.data.std(0) ]) # TUDO: mean only if size>1 ! # init the result matrices if len(R) == 0: for key in r.header: R[key] = ResultMatrix([ FileHandler().generateId(file) for file in _files ], np.zeros((l, l))) R[key + "_std"] = ResultMatrix([ FileHandler().generateId(file) for file in _files ], np.zeros((l, l))) # update the result matrices for i in range(len(r.header)): R[r.header[i]].data[y][x] = results[i] R[r.header[i] + "_std"].data[y][x] = results[i] for key in R.keys(): R[key].save("results/" + self.id + "/" + key + ".csv")
def run(self, _models, _folds, _type): if self.genDataSets: csv = CSV() csv.load(self.training) csv.randomize(self.seed) if _type == Type.REGRESSION: csv.createFolds(_folds, self.resultFolder + "tmp/") elif _type == Type.CLASSIFICATION: classes = csv.stratify(_folds, self.resultFolder + "tmp/") R = ResultMatrix() for i in range(0, len(_models)): model = _models[i] model.modelType = _type config = Configuration(self.training, model, _folds) config.resultFolder = self.resultFolder config.tmpFolder = self.resultFolder + "tmp/" cv = CrossValidation(config, str(i)) r = cv.run(csv.id, csv.id) results = np.hstack([r.data.mean(0), r.data.std(0)]) # TUDO: mean only if size>1 ! R.add(r.header + [x + "_std" for x in r.header], results) if self.verbose: if i == 0: r.printHeader() r.printAggregated() FileHandler().saveMatrix(R.header, R.data, self.resultFolder + "results.csv") if self.clear: FileHandler().clearFolder(self.resultFolder + "tmp/") return R.header, R.data