def __init__(self, _id="exp"): self.id = _id self.resultFolder = "results/" + self.id + "/" FileHandler().createFolder("results") FileHandler().createFolder(self.resultFolder) FileHandler().createFolder(self.resultFolder + "tmp/")
def __init__(self, _id="multi_0"): self.id = _id self.seed = 1000 FileHandler().createFolder("results") FileHandler().createFolder("results/" + self.id) FileHandler().createFolder("results/" + self.id + "/tmp") FileHandler().clearFolder(self.id)
def __init__(self, _training, _id="exp_0", **kwargs): self.training = _training self.genDataSets = True self.seed = 1000 self.id = _id self.verbose = kwargs.get('verbose', True) self.resultFolder = "results/" + self.id + "/" self.clear = False FileHandler().createFolder("results") FileHandler().createFolder(self.resultFolder) FileHandler().createFolder(self.resultFolder + "tmp/")
def run(self, _file, _callType, _numAttributes, _name="ml_test"): file = _file.split(".")[0] + ".msp430" code = "#include <msp430.h>\n\n" + "\n".join( FileHandler().read(_file)) + "\n\n" code += "void printf(char *, ...);\n" code += self.generateDummyMain(_callType, _numAttributes) FileHandler().write(code, file) self.compile(file) result = self.link(file, _name) return float(Compiler().computeSize(result))
def run(self, _file, _callType, _numAttributes): code = "#include <stdio.h>\n#include <stdlib.h>\n\n" code += "\n".join(FileHandler().read(_file)) code += self.generateDummyMain(_callType, _numAttributes) # file = _file.split(".")[0] + ".esp32" FileHandler().write(code, file) # FileHandler().write(code, Settings().espProject + "main/hello_world_main.c") return self.compile()
def run(self, _model, _files): R = {} l = len(_files) folds = 10 for y in range(l): training = _files[y] for x in range(l): cfg = Configuration(training, _model, folds) cfg.resultFolder = "results/" + self.id + "/" cfg.tmpFolder = cfg.resultFolder + "tmp/" test = _files[x] csvA = CSV(training) csvA.randomize(self.seed) csvA.createFolds(folds, cfg.tmpFolder) csvB = CSV(test) csvB.randomize(self.seed) csvB.createFolds(folds, cfg.tmpFolder) cv = CrossValidation(cfg) cv.model = _model cv.folds = folds cv.id = str(y) + "_" + str(x) r = cv.run(csvA.id, csvB.id) results = np.hstack([r.data.mean(0), r.data.std(0) ]) # TUDO: mean only if size>1 ! # init the result matrices if len(R) == 0: for key in r.header: R[key] = ResultMatrix([ FileHandler().generateId(file) for file in _files ], np.zeros((l, l))) R[key + "_std"] = ResultMatrix([ FileHandler().generateId(file) for file in _files ], np.zeros((l, l))) # update the result matrices for i in range(len(r.header)): R[r.header[i]].data[y][x] = results[i] R[r.header[i] + "_std"].data[y][x] = results[i] for key in R.keys(): R[key].save("results/" + self.id + "/" + key + ".csv")
def crossValidation(self, _model, _training, _attributes, _folder, _discretization=None, **kwargs): folds = kwargs.get('xlabel', 10) self.discretization = _discretization if _attributes[0].type=="NUMERIC": self.modelType=Type.REGRESSION else: self.modelType=Type.CLASSIFICATION R = ResultMatrix() C = ConfusionMatrix(_attributes[0].type.strip("{").strip("}").split(",")) fileId = FileHandler().getFileName(_training).replace(".csv", "") for i in range(folds): foldId = fileId + "_" + str(i) + ".csv" training = _folder + "training_" + foldId test = _folder + "test_" + foldId # export the model code codeFile = _folder + "code.cpp" CodeGenerator().export(training, _model, codeFile, self.discretization) # apply the validation if self.modelType==Type.REGRESSION: keys, results, conf = self.regression(codeFile, _attributes, test, _folder + "predictions_" + str(i) + ".csv") R.add(keys, results) elif self.modelType==Type.CLASSIFICATION: keys, results, conf = self.classification(codeFile, _attributes, test, _folder + "predictions_" + str(i) + ".csv") R.add(keys, results) C.merge(conf) return R, C
def save(self, _file): M0 = [str(x) for x in self.min] M1 = [str(x) for x in self.max] W = [str(x) for x in self.widths] FileHandler().write( ",".join(self.header) + "\n" + ",".join(M0) + "\n" + ",".join(M1) + "\n" + ",".join(W), _file)
def initExperiment(_args): FileHandler().createFolder("results") resultFolder = "results/" + args.name + "/" FileHandler().createFolder(resultFolder) resultFile = resultFolder + "result.csv" if _args.classification: e = Experiment(args.classification, args.name) models = initModels(_args, Type.CLASSIFICATION) e.classification(models, 10) if _args.gen_code: exportCode(_args, resultFolder, _args.classification, models) if _args.visualize: files = [e.path("cv_" + str(i) + ".csv") for i in range(len(models))] xTicks = [model.modelName for model in models] ResultVisualizer().boxplots(files, _args.visualize, xTicks, ylabel=_args.visualize) elif _args.correlation: csv = CSV() csv.load(args.correlation) csv.computeCorrelationMatrix(resultFile) if _args.visualize: ResultVisualizer().colorMap(resultFile) elif _args.regression: e = Experiment(args.regression, args.name) models = initModels(_args, Type.REGRESSION) e.regression(models, 10) if _args.gen_code: exportCode(_args, resultFolder, _args.regression, models) if _args.visualize: files = [e.path("cv_" + str(i) + ".csv") for i in range(len(models))] xTicks = [model.modelName for model in models] ResultVisualizer().boxplots(files, _args.visualize, xTicks, ylabel=_args.visualize) print("[LIMITS]: results written to src/" + resultFolder)
def exportCode(self, _data, _csv, _attributes, _fileOut, _fileIn="", **kwargs): code = "" discretization = kwargs.get("discretization", None) if not "{" in _attributes[0].type: self.initModel(_data, _attributes, discretization) code = self.model.generateRegressionCode(_attributes) else: classes = _attributes[0].type.strip("{").strip("}").split(",") self.initModel(_data, _attributes, discretization) code = self.model.generateClassificationCode(_attributes, classes) FileHandler().write(code, _fileOut)
def build(self, _codeFile, _test): code = "#include <stdio.h>\n#include <stdlib.h>\n#include <sstream>\n#include <iostream>\n\n" + "\n".join(FileHandler().read(_codeFile)) + "\n" code += "\nint main(int _argc, char* argv[])\n{\n" lines = FileHandler().read(_test) code += "\tstd::stringstream stream;\n" for i in range(1, len(lines)): line = lines[i].split(",") code += "\tstream << " code += self.buildEmbeddedPredictionCall(line[1:]) if i<len(lines)-1: code += " << \",\"" code += ";\n" code += "\n\tstd::cout << stream.str() << std::endl;\n\n" code += "\treturn 0;\n" code += "}" FileHandler().write(code, self.tempCodeFile) Compiler().run(self.tempCodeFile, self.tempExecutable)
def run(self, _models, _folds, _type): if self.genDataSets: csv = CSV() csv.load(self.training) csv.randomize(self.seed) if _type == Type.REGRESSION: csv.createFolds(_folds, self.resultFolder + "tmp/") elif _type == Type.CLASSIFICATION: classes = csv.stratify(_folds, self.resultFolder + "tmp/") R = ResultMatrix() for i in range(0, len(_models)): model = _models[i] model.modelType = _type config = Configuration(self.training, model, _folds) config.resultFolder = self.resultFolder config.tmpFolder = self.resultFolder + "tmp/" cv = CrossValidation(config, str(i)) r = cv.run(csv.id, csv.id) results = np.hstack([r.data.mean(0), r.data.std(0)]) # TUDO: mean only if size>1 ! R.add(r.header + [x + "_std" for x in r.header], results) if self.verbose: if i == 0: r.printHeader() r.printAggregated() FileHandler().saveMatrix(R.header, R.data, self.resultFolder + "results.csv") if self.clear: FileHandler().clearFolder(self.resultFolder + "tmp/") return R.header, R.data
def export(self, _training, _model, _out, _discretize=False): FileHandler().createFolder("tmp") tmpId = "_" + str(uuid.uuid1()) tmpFolder = "tmp/" tmpTraining = "train" + tmpId + ".arff" csv = CSV(_training) csv.convertToARFF(tmpFolder + tmpTraining, False) d = None if _discretize: d = csv.discretizeData() attributes = csv.findAttributes(0) weka = WEKA() weka.folder = tmpFolder weka.train(_model, tmpFolder + tmpTraining, tmpId) data = "\n".join(FileHandler().read(tmpFolder + "raw" + tmpId + ".txt")) FileHandler().checkFolder(_out) weka.modelInterface.exportCode(data, csv, attributes, _out, _training, discretization=d) FileHandler().deleteFiles([tmpFolder + tmpTraining, tmpFolder + "raw" + tmpId + ".txt"])
def exportCode(self, _data, _csv, _attributes, _fileOut, _fileIn="", **kwargs): code = "" if not "{" in _attributes[0].type: model = self.generateGraph(_data, _attributes) code = model.generateGraphCode() code = code.replace("tree_0(", "predict(") FileHandler().write(code, _fileOut) else: print("[ERROR] M5 does not support classification")
def exportCode(self, _data, _csv, _attributes, _fileOut, _fileIn="", **kwargs): code = "" if self.modelType == Type.REGRESSION: self.initModel(_data, _csv, _attributes, _fileIn) x = np.array(_csv.getColumn(0)) y = x.astype(np.float) yRange = max(y) - min(y) yMin = min(y) code = self.model.generateRegressionCode(_attributes, yMin, yRange) else: # classification classes = _attributes[0].type.strip("{").strip("}").split(",") self.initModel(_data, _csv, _attributes, _fileIn) code = self.model.generateClassificationCode(_attributes, classes) FileHandler().write(code, _fileOut)
def run(self, _training, _models, _platforms): R = ResultMatrix() M = []; for model in _models: # run the cross validation to compute the model performance M.append(model.toString()) e = Experiment(_training) header, result = e.regression([model], 10) R.add(header, result) # train with the global training data and export code training_arff = "tmp/recommend.arff" csv = CSV() csv.load(_training) csv.convertToARFF(training_arff, False) attributes = csv.findAttributes(0) lAtt = len(attributes)-1 WEKA().train(model, training_arff, "0") data = "\n".join(FileHandler().read("tmp/raw0.txt")) codeFile = "recommend.c" model.exportCode(data, csv, attributes, codeFile) # complile platform-specific code for platform in _platforms: "" #print(model.toString() + " : " + platform.toString()) print(R.header, R.data) print(M)
def save(self, _file): FileHandler().write(",".join(self.header) + "\n" + "\n".join(self.data), _file)
def exportFoldData(self, _folds, _folder): fh = FileHandler() folds = len(_folds) # generate the index mapping indices = []; for i in range(0, folds): subset = _folds[i] subsetIndices = subset.removeIndices() indices += subsetIndices fh.write("\n".join(indices), _folder + "indices_" + self.id + ".csv") # generate the fold data for i in range(0, folds): train = _folder + "training_" + self.id + "_" + str(i) + ".csv" test = _folder + "test_" + self.id + "_" + str(i) + ".csv" fh.write(",".join(self.header) + "\n", train) fh.write(",".join(self.header) + "\n", test) for j in range(0, folds): subset = _folds[j] if i!=j: fh.append("\n".join(subset.data) + "\n", train) else: fh.append("\n".join(subset.data) + "\n", test) # generate ARFF files arff = ARFF(self.id) attributes = self.findAttributes(1) for i in range(0, folds): train = CSV(); train.load(_folder + "training_" + self.id + "_" + str(i) + ".csv") test = CSV(); test.load(_folder + "test_" + self.id + "_" + str(i) + ".csv") fh.write(arff.serialize(attributes, train.data), _folder + "training_" + self.id + "_" + str(i) + ".arff") fh.write(arff.serialize(attributes, test.data), _folder + "test_" + self.id + "_" + str(i) + ".arff")
def save(self, _file): FileHandler().saveMatrix(self.header, self.data, _file)
from plot.ResultVisualizer import ResultVisualizer # define the training data set and set up the model training = "../examples/mnoA.csv" model = ANN() # perform a 10-fold cross validation e = Experiment(training, "example_ann_feature_importance") e.regression([model], 10) # M = ResultMatrix() csv = CSV(training) attributes = csv.findAttributes(0) for i in range(10): training = e.tmp() + "training_mnoA_" + str(i) + ".csv" data = "\n".join(FileHandler().read(e.tmp() + "raw0_" + str(i) + ".txt")) ANN_WEKA(model).initModel(data, csv, attributes, training) M.add(csv.header[1:], model.computeInputLayerRanking()) M.normalizeRows() M.sortByMean() M.save(e.path("ann_features.csv")) # ResultVisualizer().barchart(e.path("ann_features.csv"), xlabel="Feature", ylabel="Relative Feature Importance", savePNG=e.path(e.id + ".png"))
from models.randomforest.RandomForest import RandomForest from weka.models.RandomForest import RandomForest as RandomForest_WEKA from experiment.Experiment import Experiment from data.CSV import CSV from code.CodeGenerator import CodeGenerator from data.FileHandler import FileHandler # define the training data set and set up the model training = "../examples/vehicleClassification.csv" model = RandomForest() model.config.depth = 7 # perform a 10-fold cross validation e = Experiment(training, "example_rf") e.classification([model], 10) # csv = CSV() csv.load(training) attributes = csv.findAttributes(0) data = "\n".join(FileHandler().read(e.tmp() + "raw0_0.txt")) RandomForest_WEKA(model).initModel(data, attributes) model.exportEps(model.depth+1, 10, 10, len(attributes)-1)
def load(self, _file): self.data = FileHandler().read(_file) self.header = self.data[0].split(",") self.data = self.data[1:] self.file = _file self.id = FileHandler().generateId(_file)
def generateCode(self, _file): csv = CSV(self.training) attributes = csv.findAttributes(0) normed = self.normalize(csv, attributes) resultType = "float" code = "#include <math.h>\n" if self.modelType == Type.CLASSIFICATION: code += "" classes = attributes[0].type.strip("{").strip("}").split(",") classes = ["\"" + key + "\"" for key in classes] code += CodeGenerator().generateArray("const char*", "classes", classes) + "\n\n" resultType = "const char*" else: code += "\n" # weight matrices if not self.useUnrolling: for i in range(0, len(self.layers)): W = self.layers[i][0] name = "w" + str(i) if i == len(self.layers) - 1: name = "w_out" code += "const " + CodeGenerator().generateMatrix( "float", name, W) + "\n" code += "\n" # threshold vectors for i in range(0, len(self.layers)): matrix = self.layers[i] T = self.layers[i][1] name = "th" + str(i) if i == len(self.layers) - 1: name = "th_out" code += "const " + CodeGenerator().generateArray("float", name, T) + "\n" code += "\n" # generate the required ann-specific methods code += self.sigmoid() + "\n\n" code += self.activate() + "\n\n" if not self.useUnrolling: code += self.mult() + "\n\n" if self.modelType == Type.CLASSIFICATION: code += CodeGenerator().findMax("float") + "\n\n" # generate the callable method header = ["_" + key for key in self.inputLayer] code += resultType + " predict(" + ", ".join( ["float " + x for x in header]) + ")\n{\n" # input layer for i in range(0, len(header)): header[i] = self.norm(header[i], normed[i + 1][0], normed[i + 1][1]) code += "\t" + CodeGenerator().generateArray("float", "in", header) + "\n\n" # activate the layers if self.useUnrolling: code += self.activateLayersWithUnrolling(normed) else: code += self.activateLayers(header, normed) code += "}\n" #code += CodeGenerator().generateDummyMain(len(attributes)-1) FileHandler().write(code, _file)
def convertToARFF(self, _file, _removeIndices=True): if _removeIndices: self.removeIndices() attributes = self.findAttributes(0) FileHandler().write(ARFF(self.id).serialize(attributes, self.data), _file)