"gaussian_nb": "GNB", "random_forest": "RF", "gradient_boosting": "GB", "neural_network": "NN", "knn": "kNN", "Svm": "SVM", "random": "Random", "default": "Default" } db = DBHelper() metadata = pd.DataFrame(db.get_all_metadata(), columns = db.metadata_columns()).drop("id", axis = 1) models = pd.DataFrame(db.get_all_models(), columns = db.models_columns()).drop("id", axis = 1) combinations = pd.DataFrame(db.get_all_combinations(), columns = db.combinations_columns()) preperformance = pd.DataFrame(db.get_all_preperformance(), columns = db.preperformance_columns()).drop("id", axis = 1) # Not null preperformance preperformance = preperformance[~preperformance.isnull().any(axis = 1)] preperformance = pd.merge(preperformance, combinations, left_on = "combination_id", right_on = "id").drop(["combination_id", "id", "num_preprocesses"], axis = 1) models = models.rename(columns = {"model": "classifier"}) models["preprocesses"] = "None" scores = pd.concat([models, preperformance], sort = False) scores = scores[scores.classifier != "neural_network"] models = models[models.classifier != "neural_network"] metadata_means = {feature: np.mean(metadata[feature]) for feature in metadata.columns if feature != "name"} metadata.fillna(value = metadata_means, inplace = True) data = pd.merge(metadata, scores, on = "name")
] db = DBHelper() if not os.path.exists("analysis/plots"): os.makedirs("analysis/plots") if not os.path.exists("analysis/plots/base_analysis"): os.makedirs("analysis/plots/base_analysis") metadata = pd.DataFrame(db.get_all_metadata(), columns=db.metadata_columns()).drop("id", axis=1) models = pd.DataFrame(db.get_all_models(), columns=db.models_columns()).drop("id", axis=1) combinations = pd.DataFrame(db.get_all_combinations(), columns=db.combinations_columns()) preperformance = pd.DataFrame(db.get_all_preperformance(), columns=db.preperformance_columns()).drop("id", axis=1) # Not null preperformance preperformance = preperformance[~preperformance.isnull().any(axis=1)] preperformance = pd.merge(preperformance, combinations, left_on="combination_id", right_on="id").drop( ["combination_id", "id", "num_preprocesses"], axis=1) models = models.rename(columns={"model": "classifier"}) models["preprocesses"] = "None" scores = pd.concat([models, preperformance], sort=False) scores = scores[scores.preprocesses.isin(constants.PRE_PROCESSES + ["None"])