"svm": "SVM", "logistic_regression": "LG", "linear_discriminant": "LD", "kneighbors": "kNN", "decision_tree": "DT", "gaussian_nb": "GNB", "random_forest": "RF", "gradient_boosting": "GB", "neural_network": "NN", "knn": "kNN", "Svm": "SVM", "random": "Random", "default": "Default" } db = DBHelper() metadata = pd.DataFrame(db.get_all_metadata(), columns = db.metadata_columns()).drop("id", axis = 1) models = pd.DataFrame(db.get_all_models(), columns = db.models_columns()).drop("id", axis = 1) combinations = pd.DataFrame(db.get_all_combinations(), columns = db.combinations_columns()) preperformance = pd.DataFrame(db.get_all_preperformance(), columns = db.preperformance_columns()).drop("id", axis = 1) # Not null preperformance preperformance = preperformance[~preperformance.isnull().any(axis = 1)] preperformance = pd.merge(preperformance, combinations, left_on = "combination_id", right_on = "id").drop(["combination_id", "id", "num_preprocesses"], axis = 1) models = models.rename(columns = {"model": "classifier"}) models["preprocesses"] = "None" scores = pd.concat([models, preperformance], sort = False) scores = scores[scores.classifier != "neural_network"] models = models[models.classifier != "neural_network"]
import os import pandas as pd import numpy as np import matplotlib.pyplot as plt import plotly.io as pio import plotly.express as px import plotly.graph_objects as go import plotly.figure_factory as ff import constants from meta_db.db.DBHelper import DBHelper db = DBHelper() regressors = pd.DataFrame(db.get_all_regressors_preperformance(), columns=[ "name", "score", "max_error", "mean_absolute_error", "mean_squared_error", "r2_score", "median_absolute_error", "classifier", "preprocesses" ]) regressors_nopp = pd.DataFrame(db.get_all_regressors(), columns=db.regressor_columns()).drop("id", axis=1) if not os.path.exists("analysis/plots"): os.makedirs("analysis/plots") if not os.path.exists("analysis/plots/meta_preperformance"): os.makedirs("analysis/plots/meta_preperformance")
import os import pandas as pd import numpy as np import matplotlib.pyplot as plt import plotly.io as pio import plotly.express as px import plotly.graph_objects as go import plotly.figure_factory as ff import constants from meta_db.db.DBHelper import DBHelper db = DBHelper() regressors = pd.DataFrame(db.get_all_regressors_preperformance(), columns = ["name", "score", "max_error", "mean_absolute_error", "mean_squared_error", "r2_score", "median_absolute_error", "classifier", "preprocesses"] ) regressors_nopp = pd.DataFrame(db.get_all_regressors(), columns = db.regressor_columns()).drop("id", axis = 1) if not os.path.exists("analysis/plots"): os.makedirs("analysis/plots") if not os.path.exists("analysis/plots/meta_preperformance"): os.makedirs("analysis/plots/meta_preperformance") translator = { "svm": "SVM", "logistic_regression": "LG", "linear_discriminant": "LD", "kneighbors": "kNN", "decision_tree": "DT", "gaussian_nb": "GNB", "random_forest": "RF",
import os import pandas as pd import numpy as np import matplotlib.pyplot as plt import constants from meta_db.db.DBHelper import DBHelper db = DBHelper() models = pd.DataFrame(db.get_all_models(), columns=db.models_columns()).drop("id", axis=1) if not os.path.exists("analysis/plots"): os.makedirs("analysis/plots") if not os.path.exists("analysis/plots/meta_base"): os.makedirs("analysis/plots/meta_base") def box_plot(score="f1_macro_mean"): data = [] for model in constants.CLASSIFIERS: data.append(models[models.model == model][score]) fig = plt.figure(figsize=(12, 4)) ax = fig.add_subplot(111) ax.boxplot(data, showmeans=True, meanline=True, labels=[ name.replace("_", " ").capitalize() for name in constants.CLASSIFIERS
import os import pandas as pd import numpy as np import matplotlib.pyplot as plt import constants from meta_db.db.DBHelper import DBHelper db = DBHelper() combinations = pd.DataFrame(db.get_all_combinations(), columns=db.combinations_columns()) preperformance = pd.DataFrame(db.get_all_preperformance(), columns=db.preperformance_columns()).drop("id", axis=1) if not os.path.exists("analysis/plots"): os.makedirs("analysis/plots") if not os.path.exists("analysis/plots/preperformance"): os.makedirs("analysis/plots/preperformance") def box_plot(score="accuracy_mean", preprocess="SMOTE"): data = [] for model in constants.CLASSIFIERS: combination_id = int( combinations[combinations.preprocesses == preprocess][ combinations.classifier == model]["id"]) data.append(preperformance[preperformance.combination_id == combination_id][score].dropna()) # import pdb; pdb.set_trace()
import os import pandas as pd import numpy as np import matplotlib.pyplot as plt import constants from meta_db.db.DBHelper import DBHelper db = DBHelper() regressors = pd.DataFrame(db.get_all_regressors(), columns=db.regressor_columns()).drop("id", axis=1) if not os.path.exists("analysis/plots"): os.makedirs("analysis/plots") if not os.path.exists("analysis/plots/meta_level"): os.makedirs("analysis/plots/meta_level") # def box_plot(score = "accuracy_mean"): score = "accuracy" regressor_score = "mean_squared_error" fig = plt.figure(figsize=(24, 24)) fig.suptitle(score, fontsize=12, fontweight='bold') for indx, clf in enumerate(constants.CLASSIFIERS): data = [] ax = fig.add_subplot(len(constants.CLASSIFIERS), 1, indx + 1) ax.set_title(clf) clf_data = regressors.query("classifier == '{}' and score == '{}'".format( clf, score)) for reg in constants.REGRESSORS: reg_info = clf_data.query("name == '{}'".format(reg))[regressor_score]
import os import pandas as pd import numpy as np import matplotlib.pyplot as plt import constants from meta_db.db.DBHelper import DBHelper db = DBHelper() regressors = pd.DataFrame(db.get_all_regressors_preperformance(), columns=[ "name", "score", "max_error", "mean_absolute_error", "mean_squared_error", "r2_score", "median_absolute_error", "classifier", "preprocesses" ]) if not os.path.exists("analysis/plots"): os.makedirs("analysis/plots") if not os.path.exists("analysis/plots/meta_preperformance"): os.makedirs("analysis/plots/meta_preperformance") def box_plot(preprocess): score = "accuracy" regressor_score = "mean_squared_error" fig = plt.figure(figsize=(24, 24)) fig.suptitle(score, fontsize=12, fontweight='bold') for indx, clf in enumerate(constants.CLASSIFIERS): data = []