Esempio n. 1
0
	def __init__(self, _id="exp"):
		self.id = _id
		self.resultFolder = "results/" + self.id + "/"		

		FileHandler().createFolder("results")
		FileHandler().createFolder(self.resultFolder)
		FileHandler().createFolder(self.resultFolder + "tmp/")
Esempio n. 2
0
    def __init__(self, _id="multi_0"):
        self.id = _id
        self.seed = 1000

        FileHandler().createFolder("results")
        FileHandler().createFolder("results/" + self.id)
        FileHandler().createFolder("results/" + self.id + "/tmp")
        FileHandler().clearFolder(self.id)
Esempio n. 3
0
    def __init__(self, _training, _id="exp_0", **kwargs):
        self.training = _training
        self.genDataSets = True
        self.seed = 1000
        self.id = _id
        self.verbose = kwargs.get('verbose', True)
        self.resultFolder = "results/" + self.id + "/"
        self.clear = False

        FileHandler().createFolder("results")
        FileHandler().createFolder(self.resultFolder)
        FileHandler().createFolder(self.resultFolder + "tmp/")
Esempio n. 4
0
    def run(self, _file, _callType, _numAttributes, _name="ml_test"):
        file = _file.split(".")[0] + ".msp430"
        code = "#include <msp430.h>\n\n" + "\n".join(
            FileHandler().read(_file)) + "\n\n"
        code += "void printf(char *, ...);\n"
        code += self.generateDummyMain(_callType, _numAttributes)

        FileHandler().write(code, file)

        self.compile(file)
        result = self.link(file, _name)
        return float(Compiler().computeSize(result))
Esempio n. 5
0
    def run(self, _file, _callType, _numAttributes):
        code = "#include <stdio.h>\n#include <stdlib.h>\n\n"
        code += "\n".join(FileHandler().read(_file))
        code += self.generateDummyMain(_callType, _numAttributes)

        #
        file = _file.split(".")[0] + ".esp32"
        FileHandler().write(code, file)

        #
        FileHandler().write(code,
                            Settings().espProject + "main/hello_world_main.c")

        return self.compile()
Esempio n. 6
0
    def run(self, _model, _files):
        R = {}
        l = len(_files)
        folds = 10
        for y in range(l):
            training = _files[y]
            for x in range(l):
                cfg = Configuration(training, _model, folds)
                cfg.resultFolder = "results/" + self.id + "/"
                cfg.tmpFolder = cfg.resultFolder + "tmp/"

                test = _files[x]

                csvA = CSV(training)
                csvA.randomize(self.seed)
                csvA.createFolds(folds, cfg.tmpFolder)

                csvB = CSV(test)
                csvB.randomize(self.seed)
                csvB.createFolds(folds, cfg.tmpFolder)

                cv = CrossValidation(cfg)
                cv.model = _model
                cv.folds = folds

                cv.id = str(y) + "_" + str(x)
                r = cv.run(csvA.id, csvB.id)

                results = np.hstack([r.data.mean(0),
                                     r.data.std(0)
                                     ])  # TUDO: mean only if size>1 !

                # init the result matrices
                if len(R) == 0:
                    for key in r.header:
                        R[key] = ResultMatrix([
                            FileHandler().generateId(file) for file in _files
                        ], np.zeros((l, l)))
                        R[key + "_std"] = ResultMatrix([
                            FileHandler().generateId(file) for file in _files
                        ], np.zeros((l, l)))

                # update the result matrices
                for i in range(len(r.header)):
                    R[r.header[i]].data[y][x] = results[i]
                    R[r.header[i] + "_std"].data[y][x] = results[i]

        for key in R.keys():
            R[key].save("results/" + self.id + "/" + key + ".csv")
Esempio n. 7
0
	def crossValidation(self, _model, _training, _attributes, _folder, _discretization=None, **kwargs):
		folds = kwargs.get('xlabel', 10)
		self.discretization = _discretization
		if _attributes[0].type=="NUMERIC":
			self.modelType=Type.REGRESSION		
		else:
			self.modelType=Type.CLASSIFICATION
			
		R = ResultMatrix()
		C = ConfusionMatrix(_attributes[0].type.strip("{").strip("}").split(","))
		fileId = FileHandler().getFileName(_training).replace(".csv", "")
		
		for i in range(folds):
			foldId = fileId + "_" + str(i) + ".csv"
			training = _folder + "training_" + foldId
			test = _folder + "test_" + foldId

			# export the model code
			codeFile = _folder + "code.cpp"
			CodeGenerator().export(training, _model, codeFile, self.discretization)

			# apply the validation
			if self.modelType==Type.REGRESSION:
				keys, results, conf = self.regression(codeFile, _attributes, test, _folder + "predictions_" + str(i) + ".csv")
				R.add(keys, results)
			elif self.modelType==Type.CLASSIFICATION:
				keys, results, conf = self.classification(codeFile, _attributes, test, _folder + "predictions_" + str(i) + ".csv")
				R.add(keys, results)
				C.merge(conf)

		return R, C
Esempio n. 8
0
 def save(self, _file):
     M0 = [str(x) for x in self.min]
     M1 = [str(x) for x in self.max]
     W = [str(x) for x in self.widths]
     FileHandler().write(
         ",".join(self.header) + "\n" + ",".join(M0) + "\n" + ",".join(M1) +
         "\n" + ",".join(W), _file)
Esempio n. 9
0
def initExperiment(_args):
	FileHandler().createFolder("results")

	resultFolder = "results/" + args.name + "/"
	FileHandler().createFolder(resultFolder)
	resultFile = resultFolder + "result.csv"	

	if _args.classification:
		e = Experiment(args.classification, args.name)
		models = initModels(_args, Type.CLASSIFICATION)
		e.classification(models, 10)

		if _args.gen_code:
			exportCode(_args, resultFolder, _args.classification, models)

		if _args.visualize:
			files = [e.path("cv_" + str(i) + ".csv") for i in range(len(models))] 
			xTicks = [model.modelName for model in models]
			ResultVisualizer().boxplots(files, _args.visualize, xTicks,  ylabel=_args.visualize)

	elif _args.correlation:
		csv = CSV()
		csv.load(args.correlation)
		csv.computeCorrelationMatrix(resultFile)

		if _args.visualize:
			ResultVisualizer().colorMap(resultFile)

	elif _args.regression:
		e = Experiment(args.regression, args.name)
		models = initModels(_args, Type.REGRESSION)
		e.regression(models, 10)

		if _args.gen_code:
			exportCode(_args, resultFolder, _args.regression, models)

		if _args.visualize:
			files = [e.path("cv_" + str(i) + ".csv") for i in range(len(models))] 
			xTicks = [model.modelName for model in models]
			ResultVisualizer().boxplots(files, _args.visualize, xTicks,  ylabel=_args.visualize)

	print("[LIMITS]: results written to src/" + resultFolder)
Esempio n. 10
0
	def exportCode(self, _data, _csv, _attributes, _fileOut, _fileIn="", **kwargs):
		code = ""
		discretization = kwargs.get("discretization", None)
		if not "{" in _attributes[0].type: 
			self.initModel(_data, _attributes, discretization)
			code = self.model.generateRegressionCode(_attributes)
		else:
			classes = _attributes[0].type.strip("{").strip("}").split(",")
			self.initModel(_data, _attributes, discretization)
			code = self.model.generateClassificationCode(_attributes, classes)
		FileHandler().write(code, _fileOut)
Esempio n. 11
0
	def build(self, _codeFile, _test):
		code = "#include <stdio.h>\n#include <stdlib.h>\n#include <sstream>\n#include <iostream>\n\n" + "\n".join(FileHandler().read(_codeFile)) + "\n" 

		code += "\nint main(int _argc, char* argv[])\n{\n"
		lines = FileHandler().read(_test)
		code += "\tstd::stringstream stream;\n"
		for i in range(1, len(lines)):
			line = lines[i].split(",")

			code += "\tstream << "
			code += self.buildEmbeddedPredictionCall(line[1:])
			
			if i<len(lines)-1:
				code += " << \",\"" 
			code += ";\n"

		code += "\n\tstd::cout << stream.str() << std::endl;\n\n"
		code += "\treturn 0;\n"
		code += "}"

		FileHandler().write(code, self.tempCodeFile)
		Compiler().run(self.tempCodeFile, self.tempExecutable)
Esempio n. 12
0
    def run(self, _models, _folds, _type):
        if self.genDataSets:
            csv = CSV()
            csv.load(self.training)
            csv.randomize(self.seed)

            if _type == Type.REGRESSION:
                csv.createFolds(_folds, self.resultFolder + "tmp/")
            elif _type == Type.CLASSIFICATION:
                classes = csv.stratify(_folds, self.resultFolder + "tmp/")

        R = ResultMatrix()
        for i in range(0, len(_models)):
            model = _models[i]
            model.modelType = _type
            config = Configuration(self.training, model, _folds)
            config.resultFolder = self.resultFolder
            config.tmpFolder = self.resultFolder + "tmp/"

            cv = CrossValidation(config, str(i))
            r = cv.run(csv.id, csv.id)
            results = np.hstack([r.data.mean(0),
                                 r.data.std(0)])  # TUDO: mean only if size>1 !
            R.add(r.header + [x + "_std" for x in r.header], results)

            if self.verbose:
                if i == 0:
                    r.printHeader()
                r.printAggregated()

        FileHandler().saveMatrix(R.header, R.data,
                                 self.resultFolder + "results.csv")

        if self.clear:
            FileHandler().clearFolder(self.resultFolder + "tmp/")

        return R.header, R.data
Esempio n. 13
0
	def export(self, _training, _model, _out, _discretize=False):
		FileHandler().createFolder("tmp")
		tmpId = "_" + str(uuid.uuid1())
		tmpFolder = "tmp/"
		tmpTraining = "train" + tmpId + ".arff"

		csv = CSV(_training)
		csv.convertToARFF(tmpFolder + tmpTraining, False)		
		d = None
		if _discretize:
			d = csv.discretizeData()

		attributes = csv.findAttributes(0)


		weka = WEKA()
		weka.folder = tmpFolder
		weka.train(_model, tmpFolder + tmpTraining, tmpId)
		data = "\n".join(FileHandler().read(tmpFolder + "raw" + tmpId + ".txt"))

		FileHandler().checkFolder(_out)
		weka.modelInterface.exportCode(data, csv, attributes, _out, _training, discretization=d)

		FileHandler().deleteFiles([tmpFolder + tmpTraining, tmpFolder + "raw" + tmpId + ".txt"])
Esempio n. 14
0
 def exportCode(self,
                _data,
                _csv,
                _attributes,
                _fileOut,
                _fileIn="",
                **kwargs):
     code = ""
     if not "{" in _attributes[0].type:
         model = self.generateGraph(_data, _attributes)
         code = model.generateGraphCode()
         code = code.replace("tree_0(", "predict(")
         FileHandler().write(code, _fileOut)
     else:
         print("[ERROR] M5 does not support classification")
Esempio n. 15
0
    def exportCode(self,
                   _data,
                   _csv,
                   _attributes,
                   _fileOut,
                   _fileIn="",
                   **kwargs):
        code = ""
        if self.modelType == Type.REGRESSION:
            self.initModel(_data, _csv, _attributes, _fileIn)

            x = np.array(_csv.getColumn(0))
            y = x.astype(np.float)
            yRange = max(y) - min(y)
            yMin = min(y)

            code = self.model.generateRegressionCode(_attributes, yMin, yRange)
        else:  # classification
            classes = _attributes[0].type.strip("{").strip("}").split(",")
            self.initModel(_data, _csv, _attributes, _fileIn)
            code = self.model.generateClassificationCode(_attributes, classes)

        FileHandler().write(code, _fileOut)
Esempio n. 16
0
	def run(self, _training, _models, _platforms):
		R = ResultMatrix()
		M = [];
		for model in _models:
			# run the cross validation to compute the model performance
			M.append(model.toString())
			e = Experiment(_training)
			header, result = e.regression([model], 10)
			R.add(header, result)

			# train with the global training data and export code
			training_arff = "tmp/recommend.arff"

			csv = CSV()
			csv.load(_training)
			csv.convertToARFF(training_arff, False)
			attributes = csv.findAttributes(0)
			lAtt = len(attributes)-1
			WEKA().train(model, training_arff, "0")
			
			data = "\n".join(FileHandler().read("tmp/raw0.txt"))
			codeFile = "recommend.c"

			model.exportCode(data, csv, attributes, codeFile)

			# complile platform-specific code
			for platform in _platforms:
				""





				#print(model.toString() + " : " + platform.toString())
		print(R.header, R.data)
		print(M)
Esempio n. 17
0
	def save(self, _file):
		FileHandler().write(",".join(self.header) + "\n" + "\n".join(self.data), _file)
Esempio n. 18
0
	def exportFoldData(self, _folds, _folder):
		fh = FileHandler()
		folds = len(_folds)

		# generate the index mapping
		indices = [];
		for i in range(0, folds):
			subset = _folds[i]
			subsetIndices = subset.removeIndices()
			indices += subsetIndices
		fh.write("\n".join(indices), _folder + "indices_" + self.id + ".csv")

		# generate the fold data
		for i in range(0, folds):
			train = _folder + "training_" + self.id + "_" + str(i) + ".csv"
			test = _folder + "test_" + self.id + "_" + str(i) + ".csv"

			fh.write(",".join(self.header) + "\n", train)
			fh.write(",".join(self.header) + "\n", test)

			for j in range(0, folds):
				subset = _folds[j]
			
				if i!=j:
					fh.append("\n".join(subset.data) + "\n", train)
				else:
					fh.append("\n".join(subset.data) + "\n", test)

		# generate ARFF files
		arff = ARFF(self.id)
		attributes = self.findAttributes(1)
		for i in range(0, folds):
			train = CSV();
			train.load(_folder + "training_" + self.id + "_" + str(i) + ".csv")			
			test = CSV();
			test.load(_folder + "test_" + self.id + "_" + str(i) + ".csv")

			fh.write(arff.serialize(attributes, train.data), _folder + "training_" + self.id + "_" + str(i) + ".arff")
			fh.write(arff.serialize(attributes, test.data), _folder + "test_" + self.id + "_" + str(i) + ".arff")
Esempio n. 19
0
 def save(self, _file):
     FileHandler().saveMatrix(self.header, self.data, _file)
Esempio n. 20
0
from plot.ResultVisualizer import ResultVisualizer

# define the training data set and set up the model
training = "../examples/mnoA.csv"
model = ANN()

# perform a 10-fold cross validation
e = Experiment(training, "example_ann_feature_importance")
e.regression([model], 10)

#
M = ResultMatrix()
csv = CSV(training)
attributes = csv.findAttributes(0)

for i in range(10):
    training = e.tmp() + "training_mnoA_" + str(i) + ".csv"
    data = "\n".join(FileHandler().read(e.tmp() + "raw0_" + str(i) + ".txt"))

    ANN_WEKA(model).initModel(data, csv, attributes, training)
    M.add(csv.header[1:], model.computeInputLayerRanking())
M.normalizeRows()
M.sortByMean()
M.save(e.path("ann_features.csv"))

#
ResultVisualizer().barchart(e.path("ann_features.csv"),
                            xlabel="Feature",
                            ylabel="Relative Feature Importance",
                            savePNG=e.path(e.id + ".png"))
Esempio n. 21
0
from models.randomforest.RandomForest import RandomForest
from weka.models.RandomForest import RandomForest as RandomForest_WEKA
from experiment.Experiment import Experiment
from data.CSV import CSV
from code.CodeGenerator import CodeGenerator
from data.FileHandler import FileHandler


# define the training data set and set up the model
training = "../examples/vehicleClassification.csv"
model = RandomForest()
model.config.depth = 7


# perform a 10-fold cross validation
e = Experiment(training, "example_rf")
e.classification([model], 10)


# 
csv = CSV()
csv.load(training)
attributes = csv.findAttributes(0)

data = "\n".join(FileHandler().read(e.tmp() + "raw0_0.txt"))

RandomForest_WEKA(model).initModel(data, attributes)
model.exportEps(model.depth+1, 10, 10, len(attributes)-1)
Esempio n. 22
0
	def load(self, _file):
		self.data = FileHandler().read(_file)
		self.header = self.data[0].split(",")
		self.data = self.data[1:]
		self.file = _file
		self.id = FileHandler().generateId(_file)
Esempio n. 23
0
    def generateCode(self, _file):
        csv = CSV(self.training)
        attributes = csv.findAttributes(0)
        normed = self.normalize(csv, attributes)
        resultType = "float"

        code = "#include <math.h>\n"
        if self.modelType == Type.CLASSIFICATION:
            code += ""
            classes = attributes[0].type.strip("{").strip("}").split(",")
            classes = ["\"" + key + "\"" for key in classes]

            code += CodeGenerator().generateArray("const char*", "classes",
                                                  classes) + "\n\n"
            resultType = "const char*"
        else:
            code += "\n"

        # weight matrices
        if not self.useUnrolling:
            for i in range(0, len(self.layers)):
                W = self.layers[i][0]
                name = "w" + str(i)
                if i == len(self.layers) - 1:
                    name = "w_out"

                code += "const " + CodeGenerator().generateMatrix(
                    "float", name, W) + "\n"
            code += "\n"

        # threshold vectors
        for i in range(0, len(self.layers)):
            matrix = self.layers[i]
            T = self.layers[i][1]
            name = "th" + str(i)
            if i == len(self.layers) - 1:
                name = "th_out"

            code += "const " + CodeGenerator().generateArray("float", name,
                                                             T) + "\n"
        code += "\n"

        # generate the required ann-specific methods
        code += self.sigmoid() + "\n\n"
        code += self.activate() + "\n\n"
        if not self.useUnrolling:
            code += self.mult() + "\n\n"

        if self.modelType == Type.CLASSIFICATION:
            code += CodeGenerator().findMax("float") + "\n\n"

        # generate the callable method
        header = ["_" + key for key in self.inputLayer]
        code += resultType + " predict(" + ", ".join(
            ["float " + x for x in header]) + ")\n{\n"

        # input layer
        for i in range(0, len(header)):
            header[i] = self.norm(header[i], normed[i + 1][0],
                                  normed[i + 1][1])
        code += "\t" + CodeGenerator().generateArray("float", "in",
                                                     header) + "\n\n"

        # activate the layers
        if self.useUnrolling:
            code += self.activateLayersWithUnrolling(normed)
        else:
            code += self.activateLayers(header, normed)

        code += "}\n"

        #code += CodeGenerator().generateDummyMain(len(attributes)-1)

        FileHandler().write(code, _file)
Esempio n. 24
0
	def convertToARFF(self, _file, _removeIndices=True):		
		if _removeIndices:
			self.removeIndices()
		attributes = self.findAttributes(0)
		FileHandler().write(ARFF(self.id).serialize(attributes, self.data), _file)