def initModels(_args, _type): models = [] M = _args.models.split(",") for m in M: if m=="rf": model = RandomForest() models.append(model) elif m=="m5": model = M5() models.append(model) elif m=="ann": model = ANN() models.append(model) elif m=="svm": model = SVM() model.modelType = _type models.append(model) else: print("[ERROR] Model " + m + "not found", flush=True) return models
def regressionRF(_training, _trees, _depth, _file): e = Experiment(_training, verbose=False) R = ResultMatrix() for numTrees in range(1, _trees + 1): for depth in range(1, _depth + 1): rf = RandomForest() rf.config.trees = numTrees rf.config.depth = depth header, result = e.regression([rf], 10) mem = computeMemorySize(_training, rf, True) header += ["arduino", "msp", "esp"] result = np.hstack([result, mem]) print([ "#trees=" + str(numTrees) + "/" + str(_trees) + " depth=" + str(depth) + "/" + str(_depth) + ' mem=', mem ], flush=True) R.add(header, result) R.save(_file)
def regressionRF(_training, _trees, _depth, _file, _resultFolder, _discretization): csv = CSV(training) attributes = csv.findAttributes(0) R = ResultMatrix() for numTrees in range(1, _trees + 1): for depth in range(1, _depth + 1): rf = RandomForest() rf.config.trees = numTrees rf.config.depth = depth # perform a cross validation to generate the training/test files e = Experiment(_training, "example_rf_sweet_spot_disc", verbose=False) e.regression([rf], 10) # r, c = CodeEvaluator().crossValidation(rf, _training, attributes, e.tmp(), _discretization) result = np.hstack([r.data.mean(0), r.data.std(0)]) header = r.header + [x + "_std" for x in r.header] mem = computeMemorySize(_training, rf, _resultFolder, _discretization) header += ["arduino", "msp", "esp"] result = np.hstack([result, mem]) print([ "#trees=" + str(numTrees) + "/" + str(_trees) + " depth=" + str(depth) + "/" + str(_depth) + ' mem=', mem ], flush=True) R.add(header, result) R.save(_file)
from models.randomforest.RandomForest import RandomForest from experiment.Experiment import Experiment from code.CodeGenerator import CodeGenerator from data.CSV import CSV from data.ResultMatrix import ResultMatrix import numpy as np import matplotlib.pyplot as plt from plot.PlotTool import PlotTool from plot.ResultVisualizer import ResultVisualizer # define the training data set and set up the model training = "../examples/mnoA.csv" model = RandomForest() model.config.trees = 10 model.config.depth = 5 # perform a 10-fold cross validation e = Experiment(training, "example_rf_mdi") e.regression([model], 10) # M = CSV(e.path("features_0.csv")).toMatrix() M.normalizeRows() M.sortByMean() M.save(e.path("rf_features.csv")) # ResultVisualizer().barchart(e.path("rf_features.csv"), xlabel="Feature", ylabel="Relative Feature Importance", savePNG=e.path(e.id+".png"))
from models.randomforest.RandomForest import RandomForest from plot.ResultVisualizer import ResultVisualizer from experiment.ConvergenceAnalysis import ConvergenceAnalysis e = ConvergenceAnalysis("example_model_convergence") e.run("../examples/mnoA.csv", RandomForest(), 100, e.resultFolder + "convergence_rf.txt") ResultVisualizer().errorbars([e.resultFolder + "convergence_rf.txt"], "rmse", xlabel='Number of Training Samples', ylabel='RMSE', savePNG=e.resultFolder + 'example_model_convergence.png')
from models.ann.ANN import ANN from models.m5.M5 import M5 from models.randomforest.RandomForest import RandomForest from models.svm.SVM import SVM from experiment.Experiment import Experiment from plot.ResultVisualizer import ResultVisualizer import matplotlib.pyplot as plt # define the training data set and set up the model training = "../examples/mnoA.csv" models = [ANN(), M5(), RandomForest(), SVM()] # perform a 10-fold cross validation e = Experiment(training, "example_experiment") e.regression(models, 10) # visualize files = [e.path("cv_" + str(i) + ".csv") for i in range(len(models))] fig, axs = plt.subplots(2, 2) fig.set_size_inches(8, 5) xticks = [model.modelName for model in models] ResultVisualizer().boxplots(files, "r2", xticks, ylabel='R2', fig=fig, ax=axs[0][0], show=False) ResultVisualizer().boxplots(files, "mae",
from models.randomforest.RandomForest import RandomForest from weka.models.RandomForest import RandomForest as RandomForest_WEKA from experiment.Experiment import Experiment from data.CSV import CSV from code.CodeGenerator import CodeGenerator from data.FileHandler import FileHandler # define the training data set and set up the model training = "../examples/vehicleClassification.csv" model = RandomForest() model.config.depth = 7 # perform a 10-fold cross validation e = Experiment(training, "example_rf") e.classification([model], 10) # csv = CSV() csv.load(training) attributes = csv.findAttributes(0) data = "\n".join(FileHandler().read(e.tmp() + "raw0_0.txt")) RandomForest_WEKA(model).initModel(data, attributes) model.exportEps(model.depth+1, 10, 10, len(attributes)-1)