Beispiel #1
0
    def run(self, _model, _files):
        R = {}
        l = len(_files)
        folds = 10
        for y in range(l):
            training = _files[y]
            for x in range(l):
                cfg = Configuration(training, _model, folds)
                cfg.resultFolder = "results/" + self.id + "/"
                cfg.tmpFolder = cfg.resultFolder + "tmp/"

                test = _files[x]

                csvA = CSV(training)
                csvA.randomize(self.seed)
                csvA.createFolds(folds, cfg.tmpFolder)

                csvB = CSV(test)
                csvB.randomize(self.seed)
                csvB.createFolds(folds, cfg.tmpFolder)

                cv = CrossValidation(cfg)
                cv.model = _model
                cv.folds = folds

                cv.id = str(y) + "_" + str(x)
                r = cv.run(csvA.id, csvB.id)

                results = np.hstack([r.data.mean(0),
                                     r.data.std(0)
                                     ])  # TUDO: mean only if size>1 !

                # init the result matrices
                if len(R) == 0:
                    for key in r.header:
                        R[key] = ResultMatrix([
                            FileHandler().generateId(file) for file in _files
                        ], np.zeros((l, l)))
                        R[key + "_std"] = ResultMatrix([
                            FileHandler().generateId(file) for file in _files
                        ], np.zeros((l, l)))

                # update the result matrices
                for i in range(len(r.header)):
                    R[r.header[i]].data[y][x] = results[i]
                    R[r.header[i] + "_std"].data[y][x] = results[i]

        for key in R.keys():
            R[key].save("results/" + self.id + "/" + key + ".csv")
	def run(self, _training, _model, _batchSize, _resultFile):
		csv = CSV(_training)
		csv.randomize(1000)
		csv.removeIndices()

		R = ResultMatrix()
		for i in range(int(len(csv.data)/_batchSize)):
			c = CSV()
			c.header = csv.header
			c.data = csv.data[0:(i+1)*_batchSize]

			file = self.resultFolder + "subset_" + str(i) + ".csv"
			c.save(file)

			header, data = Experiment(file).regression([_model], 10)
			R.add(header, data)

		R.save(_resultFile)
Beispiel #3
0
    def run(self, _models, _folds, _type):
        if self.genDataSets:
            csv = CSV()
            csv.load(self.training)
            csv.randomize(self.seed)

            if _type == Type.REGRESSION:
                csv.createFolds(_folds, self.resultFolder + "tmp/")
            elif _type == Type.CLASSIFICATION:
                classes = csv.stratify(_folds, self.resultFolder + "tmp/")

        R = ResultMatrix()
        for i in range(0, len(_models)):
            model = _models[i]
            model.modelType = _type
            config = Configuration(self.training, model, _folds)
            config.resultFolder = self.resultFolder
            config.tmpFolder = self.resultFolder + "tmp/"

            cv = CrossValidation(config, str(i))
            r = cv.run(csv.id, csv.id)
            results = np.hstack([r.data.mean(0),
                                 r.data.std(0)])  # TUDO: mean only if size>1 !
            R.add(r.header + [x + "_std" for x in r.header], results)

            if self.verbose:
                if i == 0:
                    r.printHeader()
                r.printAggregated()

        FileHandler().saveMatrix(R.header, R.data,
                                 self.resultFolder + "results.csv")

        if self.clear:
            FileHandler().clearFolder(self.resultFolder + "tmp/")

        return R.header, R.data