def generateRegressionCode(self, _attributes): code = "" for g in self.trees: root = g.root code += g.generateGraphCode() + "\n\n" # mean code += CodeGenerator().generateFunctionHeader( "predict", CSV().createAttributeDict(_attributes, self.discretization)) + "\n{\n" if self.discretization: code += "\tint sum = 0;\n" else: code += "\tfloat sum = 0;\n" for i in range(0, len(self.trees)): code += "\tsum += " + CodeGenerator().generateFunctionCall( "tree_" + str(i), CSV().createAttributeDict(_attributes[1:], self.discretization)) + ";\n" if self.discretization: code += "\n\treturn sum / " + str(len(self.trees)) + ";\n" # TODO: this would be required to undo the discretization, however we skip it here as we want a fully discretized model - it is assumed to dediscretization is done at the application level #code += "\n\treturn (sum / " + str(len(self.trees)) + ") * " + str((self.discretization.widths[0])) + " + " + str((self.discretization.min[0])) + ";\n" else: code += "\n\treturn sum / " + str(len(self.trees)) + ".0;\n" code += "}" return code
def initModel(self, _data, _csv, _attributes, _fileIn=""): self.model.clear() if self.modelType == Type.REGRESSION: lines = self.extractLines(_data, "weights (not support vectors):", "Number of kernel evaluations:") weights, offset = self.parseWeights(lines) self.model.weights = [weights] self.model.offsets = [offset] self.model.features = list(CSV().createAttributeDict( _attributes[1:]).keys()) self.model.normedValues = self.model.normalize( _csv, self.model.features) x = np.array(_csv.getColumn(0)) y = x.astype(np.float) yRange = max(y) - min(y) yMin = min(y) code = self.model.generateRegressionCode(_attributes, yMin, yRange) else: # classification classes = _attributes[0].type.strip("{").strip("}").split(",") self.model.weights, self.model.classes, self.model.offsets = self.parseSVMs( _data) self.model.features = list(CSV().createAttributeDict( _attributes[1:]).keys()) self.model.normedValues = self.model.normalize( _csv, self.model.features)
def initModel(self, _data, _csv, _attributes, _fileIn=""): self.model.clear() N = [] O = [] L = [] csv = CSV() csv.load(_fileIn) self.model.inputLayerKeys = csv.header[1:] self.model.training = _fileIn if not "{" in _attributes[0].type: N, O = self.parseNodes(_data, 1) L = self.parseLayers(self.model.config.hiddenLayers, N, O) self.model.modelType = Type.REGRESSION self.model.outputLayerKeys.append(csv.header[0]) else: classes = self.extractClasses(_attributes) N, O = self.parseNodes(_data, len(classes)) L = self.parseLayers(self.model.config.hiddenLayers, N, O) self.model.modelType = Type.CLASSIFICATION self.model.outputLayerKeys = classes for i in range(len(L)): W, T = self.generateWeightMatrix(L[i]) self.model.weights.append(W) self.model.thresholds.append(T) W, T = self.generateWeightMatrix(O) self.model.weights.append(W) self.model.thresholds.append(T) self.model.L = L
def colorMap(self, _file, **kwargs): csv = CSV(_file) M = csv.getNumericData() center = (np.min(M) + np.max(M)) / 2 pt = PlotTool(kwargs) im = pt.ax.imshow(M, cmap=kwargs.get("cmap", "coolwarm")) for i in range(len(M)): for j in range(len(M)): v = M[i, j] color = "k" if v > center: color = "w" text = pt.ax.text(j, i, format(v, '.2f'), ha="center", va="center", color=color) pt.ax.set_xticks(np.arange(len(M))) pt.ax.set_yticks(np.arange(len(M))) pt.ax.set_xticklabels(csv.header) pt.ax.set_yticklabels(csv.header) pt.ax.set_title(kwargs.get("title", "")) plt.setp(pt.ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") pt.finalize(kwargs)
def scatter(self, _files, _keyX, _keyY, **kwargs): pt = PlotTool(kwargs) for file in _files: csv = CSV(file) X = csv.getNumericColumnWithKey(_keyX) Y = csv.getNumericColumnWithKey(_keyY) plt.scatter(X, Y, marker="*", c="blue") pt.finalize(kwargs)
def boxplots(self, _files, _key, _xTickLabels, **kwargs): Y = [] for file in _files: csv = CSV(file) y = csv.getNumericColumnWithKey(_key) Y.append(y) pt = PlotTool(kwargs) pt.boxplot(Y, _xTickLabels) pt.finalize(kwargs)
def errorbars(self, _files, _key, **kwargs): pt = PlotTool(kwargs) for file in _files: csv = CSV(file) y = csv.getNumericColumnWithKey(_key) yStd = csv.getNumericColumnWithKey(_key + "_std") x = np.arange(1, len(csv.data) + 1) * 100 plt.errorbar(x, y, yerr=yStd, capsize=7) pt.finalize(kwargs)
def readAsMatrix(self, _file, _key, _sX, _sY): csv = CSV(_file) M = np.zeros((_sY, _sX)) Y = csv.getNumericColumnWithKey(_key) for x in range(_sX): for y in range(_sY): v = Y[x * _sY + y] if v == -1: v = None M[y][x] = v return M
def computeMemorySize(_training, _model, _resultFolder, _discretization): csv = CSV(_training) lAtt = len(csv.findAttributes(0)) - 1 codeFile = "example_rf_sweet_spot.cpp" CodeGenerator().export(_training, _model, codeFile, _discretization) mem = [] platforms = [Arduino(), MSP430(), ESP32()] for platform in platforms: mem.append(platform.run(codeFile, "unsigned char", lAtt)) return mem
def generateClassificationCode(self, _attributes, _classes): code = "" code += CodeGenerator().generateArray("const char*", "classes", ["\"" + x + "\"" for x in _classes]) + "\n" # compute the weight vectors for i in range(0, len(self.model.weights)): w = self.model.getWeights(self.model.weights[i], self.model.features) code += CodeGenerator().generateArray("const float", "w" + str(i), w) + "\n" code += "\n" + self.generateSVMCode() + "\n\n" code += CodeGenerator().findMax("int") + "\n\n" code += CodeGenerator().generateFunctionHeader("predict", CSV().createAttributeDict(_attributes)) + "\n{\n" # compute the value normalizations code += "\t" + CodeGenerator().generateArray("float", "v", self.model.normedValues) + "\n\n" # one-vs-one code += "\t" + CodeGenerator().generateArray("int", "wins", ["0"] * len(_classes)) + "\n" for i in range(0, len(self.model.weights)): c0 = str(_classes.index(self.model.classes[i][0])) c1 = str(_classes.index(self.model.classes[i][1])) code += "\tsvm(v, w" + str(i) + ", " + str(self.model.offsets[i]) + ", " + str(len(self.model.features)) + ")<0 ? wins[" + c0 + "]++ : wins[" + c1 + "]++;\n" code += "\n\tunsigned int index = findMax(wins, " + str(len(_classes)) + ");\n\n" code += "\treturn classes[index];\n" code += "}\n\n" return code
def plotSweetSpot(_example, _file, _layers, _nodes): csv = CSV(_file) fig, axs = plt.subplots(2, 2) M = ResultVisualizer().readAsMatrix(_file, "accuracy", _layers, _nodes) S = ResultVisualizer().readAsMatrix(_file, "accuracy_std", _layers, _nodes) for i in range(_layers): y = M[:, i] yStd = S[:, i] x = range(len(y)) ax = axs[0, 0] ax.errorbar(x, y, yerr=yStd, capsize=7) ax.set_title('Model Performance') ax.set(xlabel='#Nodes on Hidden Layer', ylabel='Accuracy') plot(ResultVisualizer().readAsMatrix(_file, "msp", _layers, _nodes) / 1000, axs[0, 1], "MSP430", _nodes) plot( ResultVisualizer().readAsMatrix(_file, "arduino", _layers, _nodes) / 1000, axs[1, 0], "Atmega", _nodes) plot(ResultVisualizer().readAsMatrix(_file, "esp", _layers, _nodes) / 1000, axs[1, 1], "ESP32", _nodes) fig.tight_layout() fig.set_size_inches(8, 5) fig.savefig(_example.path("example_ann_sweet_spot.png"), format="png") plt.show()
def regression(self, _codeFile, _attributes, _test, _resultFile=""): # att->train, self.modelType = Type.REGRESSION self.build(_codeFile, _test) L = np.array([]) P = np.array([]) labels = CSV(_test).getColumn(0) predictions = self.execute(self.tempExecutable).split(",") for i in range(len(predictions)): prediction = float(predictions[i]) if self.discretization: prediction = self.discretization.dediscretize(self.discretization.header[0], prediction) L = np.append(L, float(labels[i])) P = np.append(P, prediction) mae = self.computeMAE(L, P) rmse = self.computeRMSE(L, P) r2 = self.computeR2(L, P) # if _resultFile: raw = ResultMatrix() raw.add(["label", "prediction"], L) raw.add(["label", "prediction"], P) raw.data = raw.data.transpose() raw.save(_resultFile) return ["r2", "mae", "rmse"], np.array([r2, mae, rmse]), ConfusionMatrix()
def barchart(self, _file, **kwargs): M = CSV(_file).toMatrix() Y = np.mean(M.data, axis=0) S = np.std(M.data, axis=0) pt = PlotTool() pt.barchart(Y, S, M.header) pt.finalize(kwargs)
def computeMemorySize(_training, _model, _regression): csv = CSV(_training) lAtt = len(csv.findAttributes(0)) - 1 codeFile = "example_rf_sweet_spot.cpp" CodeGenerator().export(_training, _model, codeFile) if _regression == True: resultType = "float" else: resultType = "const char*" mem = [] platforms = [Arduino(), MSP430(), ESP32()] for platform in platforms: mem.append(platform.run(codeFile, resultType, lAtt)) return mem
def generateClassificationCode(self, _attributes, _classes): code = "" classes = ["\"" + x + "\"" for x in _classes] code += CodeGenerator().generateArray("const char*", "classes", classes) + "\n\n" # for g in self.trees: root = g.root treeCode = g.generateGraphCode() + "\n\n" for i in range(0, len(classes)): key = classes[i] treeCode = treeCode.replace("const char* tree", "int tree") treeCode = treeCode.replace("return " + key, "return " + str(i)) code += treeCode code += CodeGenerator().findMax("int") + "\n\n" # majority decision code += CodeGenerator().generateFunctionHeader( "predict", CSV().createAttributeDict(_attributes, self.discretization)) + "\n{\n" code += "\t" + CodeGenerator().generateArray( "int", "wins", ["0"] * len(_classes)) + "\n" for i in range(0, len(self.trees)): code += "\twins[" + CodeGenerator().generateFunctionCall( "tree_" + str(i), CSV().createAttributeDict(_attributes[1:], self.discretization)) + "]++;\n" code += "\tunsigned int index = findMax(wins, " + str( len(_classes)) + ");\n\n" code += "\treturn classes[index];\n" code += "}" return code
def regressionRF(_training, _trees, _depth, _file, _resultFolder, _discretization): csv = CSV(training) attributes = csv.findAttributes(0) R = ResultMatrix() for numTrees in range(1, _trees + 1): for depth in range(1, _depth + 1): rf = RandomForest() rf.config.trees = numTrees rf.config.depth = depth # perform a cross validation to generate the training/test files e = Experiment(_training, "example_rf_sweet_spot_disc", verbose=False) e.regression([rf], 10) # r, c = CodeEvaluator().crossValidation(rf, _training, attributes, e.tmp(), _discretization) result = np.hstack([r.data.mean(0), r.data.std(0)]) header = r.header + [x + "_std" for x in r.header] mem = computeMemorySize(_training, rf, _resultFolder, _discretization) header += ["arduino", "msp", "esp"] result = np.hstack([result, mem]) print([ "#trees=" + str(numTrees) + "/" + str(_trees) + " depth=" + str(depth) + "/" + str(_depth) + ' mem=', mem ], flush=True) R.add(header, result) R.save(_file)
def init(self, _lines, _attributes): self.attributes = CSV().createAttributeDict(_attributes, self.discretization) lines = _lines self.root = Node(0, 0, "", "") lastNode = self.root depth = 0 for line in lines: if len(line)==0: continue d = len(line.split("|")) condition = line.split("|")[-1].split(" : ")[0].strip(" ") result = "" if " : " in line and "(" in line: result = line.split("|")[-1].strip(" ").split(" : ")[1].split("(")[0] self.leaves += 1 # TODO: IF THE RESULT IS NOMIMAL, ADD "" self.depth = max(d, self.depth) if d>depth: # left child -> if node = Node(lastNode, d, condition, result) self.nodes += 1 lastNode.leftChild = node lastNode = node elif d<depth: delta = depth - d parent = lastNode.parent for i in range(0, delta): parent = parent.parent node = Node(parent, d, "else", result) self.nodes += 1 parent.rightChild = node lastNode = node else: # right child -> else parent = lastNode.parent node = Node(parent, d, "else", result) parent.rightChild = node lastNode = node depth = d return self.root
def classification(self, _codeFile, _attributes, _test, _resultFile=""): # att->train, self.modelType = Type.CLASSIFICATION self.build(_codeFile, _test) classes = _attributes[0].type.strip("{").strip("}").split(",") conf = ConfusionMatrix(classes) labels = CSV(_test).getColumn(0) predictions = self.execute(self.tempExecutable).split(",") for i in range(len(predictions)): conf.update(predictions[i], labels[i]) accuracy, precision, recall, f_score = conf.calc() return ["accuracy", "precision", "recall", "f_score"], np.array([accuracy, precision, recall, f_score]), conf
def run(self, _models, _folds, _type): if self.genDataSets: csv = CSV() csv.load(self.training) csv.randomize(self.seed) if _type == Type.REGRESSION: csv.createFolds(_folds, self.resultFolder + "tmp/") elif _type == Type.CLASSIFICATION: classes = csv.stratify(_folds, self.resultFolder + "tmp/") R = ResultMatrix() for i in range(0, len(_models)): model = _models[i] model.modelType = _type config = Configuration(self.training, model, _folds) config.resultFolder = self.resultFolder config.tmpFolder = self.resultFolder + "tmp/" cv = CrossValidation(config, str(i)) r = cv.run(csv.id, csv.id) results = np.hstack([r.data.mean(0), r.data.std(0)]) # TUDO: mean only if size>1 ! R.add(r.header + [x + "_std" for x in r.header], results) if self.verbose: if i == 0: r.printHeader() r.printAggregated() FileHandler().saveMatrix(R.header, R.data, self.resultFolder + "results.csv") if self.clear: FileHandler().clearFolder(self.resultFolder + "tmp/") return R.header, R.data
def generateRegressionCode(self, _attributes, _yMin, _yRange): code = "" # compute the weight vectors for i in range(0, len(self.model.weights)): w = self.model.getWeights(self.model.weights[i], self.model.features) code += CodeGenerator().generateArray("const float", "w" + str(i), w) + "\n" code += "\n" + self.generateSVMCode() + "\n\n" code += CodeGenerator().generateFunctionHeader("predict", CSV().createAttributeDict(_attributes)) + "\n{\n" code += "\t" + CodeGenerator().generateArray("float", "v", self.model.normedValues) + "\n\n" code += "\tfloat result = svm(v, w0, " + self.model.offsets[0] + ", " + str(len(self.model.normedValues)) + ");\n" # denormalize the label code += "\treturn result * " + str(_yRange) + " " + self.add(_yMin) + ";\n" code += "}\n\n" return code
def initExperiment(_args): FileHandler().createFolder("results") resultFolder = "results/" + args.name + "/" FileHandler().createFolder(resultFolder) resultFile = resultFolder + "result.csv" if _args.classification: e = Experiment(args.classification, args.name) models = initModels(_args, Type.CLASSIFICATION) e.classification(models, 10) if _args.gen_code: exportCode(_args, resultFolder, _args.classification, models) if _args.visualize: files = [e.path("cv_" + str(i) + ".csv") for i in range(len(models))] xTicks = [model.modelName for model in models] ResultVisualizer().boxplots(files, _args.visualize, xTicks, ylabel=_args.visualize) elif _args.correlation: csv = CSV() csv.load(args.correlation) csv.computeCorrelationMatrix(resultFile) if _args.visualize: ResultVisualizer().colorMap(resultFile) elif _args.regression: e = Experiment(args.regression, args.name) models = initModels(_args, Type.REGRESSION) e.regression(models, 10) if _args.gen_code: exportCode(_args, resultFolder, _args.regression, models) if _args.visualize: files = [e.path("cv_" + str(i) + ".csv") for i in range(len(models))] xTicks = [model.modelName for model in models] ResultVisualizer().boxplots(files, _args.visualize, xTicks, ylabel=_args.visualize) print("[LIMITS]: results written to src/" + resultFolder)
def run(self, _training, _models, _platforms): R = ResultMatrix() M = []; for model in _models: # run the cross validation to compute the model performance M.append(model.toString()) e = Experiment(_training) header, result = e.regression([model], 10) R.add(header, result) # train with the global training data and export code training_arff = "tmp/recommend.arff" csv = CSV() csv.load(_training) csv.convertToARFF(training_arff, False) attributes = csv.findAttributes(0) lAtt = len(attributes)-1 WEKA().train(model, training_arff, "0") data = "\n".join(FileHandler().read("tmp/raw0.txt")) codeFile = "recommend.c" model.exportCode(data, csv, attributes, codeFile) # complile platform-specific code for platform in _platforms: "" #print(model.toString() + " : " + platform.toString()) print(R.header, R.data) print(M)
def exportWeights(self, _features, _file): M = CSV() if len(self.classes) > 0: M.header = ['class0', 'class1'] + _features else: M.header = _features for c in range(len(self.weights)): W = self.weights[c] F = [] for feature in _features: if feature in W: F.append(W[feature]) else: F.append(0) if len(self.classes) > 0: M.data.append(','.join(self.classes[c] + [str(x) for x in F])) else: M.data.append(','.join([str(x) for x in F])) M.save(_file)
def export(self, _training, _model, _out, _discretize=False): FileHandler().createFolder("tmp") tmpId = "_" + str(uuid.uuid1()) tmpFolder = "tmp/" tmpTraining = "train" + tmpId + ".arff" csv = CSV(_training) csv.convertToARFF(tmpFolder + tmpTraining, False) d = None if _discretize: d = csv.discretizeData() attributes = csv.findAttributes(0) weka = WEKA() weka.folder = tmpFolder weka.train(_model, tmpFolder + tmpTraining, tmpId) data = "\n".join(FileHandler().read(tmpFolder + "raw" + tmpId + ".txt")) FileHandler().checkFolder(_out) weka.modelInterface.exportCode(data, csv, attributes, _out, _training, discretization=d) FileHandler().deleteFiles([tmpFolder + tmpTraining, tmpFolder + "raw" + tmpId + ".txt"])
def generateCode(self, _file): csv = CSV(self.training) attributes = csv.findAttributes(0) normed = self.normalize(csv, attributes) resultType = "float" code = "#include <math.h>\n" if self.modelType == Type.CLASSIFICATION: code += "" classes = attributes[0].type.strip("{").strip("}").split(",") classes = ["\"" + key + "\"" for key in classes] code += CodeGenerator().generateArray("const char*", "classes", classes) + "\n\n" resultType = "const char*" else: code += "\n" # weight matrices if not self.useUnrolling: for i in range(0, len(self.layers)): W = self.layers[i][0] name = "w" + str(i) if i == len(self.layers) - 1: name = "w_out" code += "const " + CodeGenerator().generateMatrix( "float", name, W) + "\n" code += "\n" # threshold vectors for i in range(0, len(self.layers)): matrix = self.layers[i] T = self.layers[i][1] name = "th" + str(i) if i == len(self.layers) - 1: name = "th_out" code += "const " + CodeGenerator().generateArray("float", name, T) + "\n" code += "\n" # generate the required ann-specific methods code += self.sigmoid() + "\n\n" code += self.activate() + "\n\n" if not self.useUnrolling: code += self.mult() + "\n\n" if self.modelType == Type.CLASSIFICATION: code += CodeGenerator().findMax("float") + "\n\n" # generate the callable method header = ["_" + key for key in self.inputLayer] code += resultType + " predict(" + ", ".join( ["float " + x for x in header]) + ")\n{\n" # input layer for i in range(0, len(header)): header[i] = self.norm(header[i], normed[i + 1][0], normed[i + 1][1]) code += "\t" + CodeGenerator().generateArray("float", "in", header) + "\n\n" # activate the layers if self.useUnrolling: code += self.activateLayersWithUnrolling(normed) else: code += self.activateLayers(header, normed) code += "}\n" #code += CodeGenerator().generateDummyMain(len(attributes)-1) FileHandler().write(code, _file)
from models.randomforest.RandomForest import RandomForest from experiment.Experiment import Experiment from code.CodeGenerator import CodeGenerator from code.CodeEvaluator import CodeEvaluator from data.CSV import CSV # define the training data set and set up the model training = "../examples/mnoA.csv" training = "../examples/vehicleClassification.csv" csv = CSV(training) attributes = csv.findAttributes(0) d = csv.discretizeData() model = RandomForest() model.config.trees = 10 model.config.depth = 5 # perform a 10-fold cross validation e = Experiment(training, "example_rf_disc") e.classification([model], 10) # export the C++ code CodeGenerator().export(training, model, e.path("rf.cpp"), d) # ce = CodeEvaluator() R, C = ce.crossValidation(model, training, attributes, e.tmp(), d) R.printAggregated()
from models.randomforest.RandomForest import RandomForest from experiment.Experiment import Experiment from code.CodeGenerator import CodeGenerator from data.CSV import CSV from data.ResultMatrix import ResultMatrix import numpy as np import matplotlib.pyplot as plt from plot.PlotTool import PlotTool from plot.ResultVisualizer import ResultVisualizer # define the training data set and set up the model training = "../examples/mnoA.csv" model = RandomForest() model.config.trees = 10 model.config.depth = 5 # perform a 10-fold cross validation e = Experiment(training, "example_rf_mdi") e.regression([model], 10) # M = CSV(e.path("features_0.csv")).toMatrix() M.normalizeRows() M.sortByMean() M.save(e.path("rf_features.csv")) # ResultVisualizer().barchart(e.path("rf_features.csv"), xlabel="Feature", ylabel="Relative Feature Importance", savePNG=e.path(e.id+".png"))
from data.FileHandler import FileHandler from data.ResultMatrix import ResultMatrix import numpy as np from plot.ResultVisualizer import ResultVisualizer # define the training data set and set up the model training = "../examples/mnoA.csv" model = ANN() # perform a 10-fold cross validation e = Experiment(training, "example_ann_feature_importance") e.regression([model], 10) # M = ResultMatrix() csv = CSV(training) attributes = csv.findAttributes(0) for i in range(10): training = e.tmp() + "training_mnoA_" + str(i) + ".csv" data = "\n".join(FileHandler().read(e.tmp() + "raw0_" + str(i) + ".txt")) ANN_WEKA(model).initModel(data, csv, attributes, training) M.add(csv.header[1:], model.computeInputLayerRanking()) M.normalizeRows() M.sortByMean() M.save(e.path("ann_features.csv")) # ResultVisualizer().barchart(e.path("ann_features.csv"), xlabel="Feature",
from data.CSV import CSV from plot.ResultVisualizer import ResultVisualizer # define the training data set training = "../examples/mnoA.csv" # compute amd export the correlation matrix csv = CSV() csv.load(training) resultFolder = "results/example_correlation/" resultFile = resultFolder + "corr.csv" csv.computeCorrelationMatrix(resultFile) ResultVisualizer().colorMap(resultFile, savePNG=resultFolder + 'example_correlation.png') # all results are written to results/example_correlation/
from models.m5.M5 import M5 from experiment.Experiment import Experiment from code.CodeGenerator import CodeGenerator from data.CSV import CSV from code.Arduino import Arduino # define the training data set and set up the model training = "../examples/mnoA.csv" model = M5() # perform a 10-fold cross validation e = Experiment(training, "example_arduino") e.regression([model], 10) # export the raw C++ code codeFile = e.path("arduino.cpp") CodeGenerator().export(training, model, codeFile) # create a dummy Arduino project which executes the model csv = CSV() csv.load(training) attributes = csv.findAttributes(0) mem = Arduino().run(codeFile, "float", len(attributes) - 1) print(mem) # all results are written to results/example_arduino/