def ___rmse_over_ratio(rmse, weight, outfolder): stats = rmse[["ratio", "rmse_best"]] filename = "%s/praise_rmse_over_ratio_%.1f" % (outfolder, weight) np.savetxt(filename, stats, fmt="%.2f\t&\t%.8f", newline="\t\\\\\n") data = stats.set_index("ratio") min_ratio = data.idxmin() Plotter.plot_data_over_ratios(data, "rmse_%.1f" % weight, outfolder, ylog=True) stats_extra = rmse[["ratio", "rmse_extra_best"]] filename_extra = "%s/praise_extra_rmse_over_ratio_%.1f" % (outfolder, weight) np.savetxt(filename_extra, stats_extra, fmt="%.2f\t&\t%.8f", newline="\t\\\\\n") data_extra = stats_extra.set_index("ratio") min_ratio_extra = data_extra.idxmin() Plotter.plot_data_over_ratios(data_extra, "rmse_extra_%.1f" % weight, outfolder, ylog=True) return min_ratio.values[0], min_ratio_extra.values[0]
def save_feature_importances(self, outfolder): # compute feature importances for each weight importances = np.empty(shape=(0, 0)) for weight in self.dataset.weights: lstats = self.dataset.lstats[weight] clsset = ClassificationSet.sanitize_and_init( self.features.features, lstats.winners, lstats.costs) clf = ExtraTreesClassifier() clf = clf.fit(clsset.X, clsset.y) if importances.shape[0] == 0: importances = clf.feature_importances_ else: importances = np.vstack( [importances, clf.feature_importances_]) # sort feature names by average importance sorted_feature_names = [ name for _, name in sorted( zip(importances.mean(axis=0), self.features.features.columns)) ][::-1] importances = pd.DataFrame(data=importances, columns=self.features.features.columns) importances = importances[sorted_feature_names] feats = pd.DataFrame(columns=["order", "value", "name", "error"]) #, \ #dtype={"order": np.int64, "value": np.float_, "name":np.object_, "error": np.float_}) feats["order"] = np.arange(len(self.features.features.columns))[::-1] feats["value"] = importances.mean(axis=0).values feats["error"] = importances.std(axis=0).values feats["name"] = sorted_feature_names feats.to_csv("%s/feats" % outfolder, sep="&", index=False, line_terminator="\\\\\n") Plotter.plot_feature_importances(importances, outfolder, 30)
def fit_time(raw_sample_stats, outfolder="/tmp"): sample_times = pd.DataFrame() for ratio in raw_sample_stats.ratios: sample_times = sample_times.append( raw_sample_stats.get_times(ratio).agg("mean"), ignore_index=True) sample_times = sample_times.set_index(raw_sample_stats.ratios) # save xdata, ydata to file sample_times.to_csv("%s/times_over_ratio" % outfolder, sep='&', line_terminator='\\\\\n') # plot welfare over ratios Plotter.plot_data_over_ratios(sample_times, "time", outfolder) xdata = 10000 * sample_times.index for algo in sample_times.columns: print('==== %s ====' % algo) ydata = sample_times[algo] popt, pcov = curve_fit(func_nlogn, xdata, ydata, bounds=(0, np.inf)) print("time nlogn:", popt, np.sqrt(np.diag(pcov)) * 100. / popt, "%") popt, pcov = curve_fit(func_poly2, xdata, ydata, bounds=(0, np.inf)) print("time n2:", popt, np.sqrt(np.diag(pcov)) * 100. / popt, "%") popt, pcov = curve_fit(func_poly3, xdata, ydata, bounds=(0, np.inf)) print("time n3:", popt, np.sqrt(np.diag(pcov)) * 100. / popt, "%") popt, pcov = curve_fit(func_poly321, xdata, ydata, bounds=(0, np.inf)) print("time n321:", popt, np.sqrt(np.diag(pcov)) * 100. / popt, "%") popt, pcov = curve_fit(func_n3logn, xdata, ydata, bounds=(0, np.inf)) print("time n3logn:", popt, np.sqrt(np.diag(pcov)) * 100. / popt, "%") popt, pcov = curve_fit(func_n2logn, xdata, ydata, bounds=(0, np.inf)) print("time n2logn:", popt, np.sqrt(np.diag(pcov)) * 100. / popt, "%") popt, pcov = curve_fit(func_nlogn_n, xdata, ydata, bounds=(0, np.inf)) print("time nlogn+n:", popt, np.sqrt(np.diag(pcov)) * 100. / popt, "%")
def plot(self, outfolder="/tmp"): outfile = "%s/random_%s" % (outfolder, self.name) welfares = self.__get_normalized_welfares() data = [] for algo in self.algos: data.append(welfares[welfares.algorithm == algo].welfare.values) print("[%s] min = %.2f, max = %.2f" % (algo, welfares[welfares.algorithm == algo].welfare.min(), welfares[welfares.algorithm == algo].welfare.max())) Plotter.boxplot_random(data, self.algos, outfile)
def plot(self, outfolder="/tmp"): outfile_welfare = "%s/welfare_%s" % (outfolder, self.name) outfile_time = "%s/time_%s" % (outfolder, self.name) welfares = self.get_welfares_feasible() times = self.get_times_feasible() # normalize welfare and time by values of optimal algorithm (cplex) welfares = welfares.div(welfares.CPLEX, axis=0).multiply(100., axis=0) times = times.div(times.CPLEX, axis=0).multiply(100., axis=0) Plotter.boxplot_average_case(welfares.values, self.algos, outfile_welfare, ylabel="% of optimal welfare (CPLEX)") Plotter.boxplot_average_case( times.values, self.algos, outfile_time, top=100000, bottom=0.01, ylog=True, ylabel="% of time of optimal algorithm (CPLEX)")
def plot(self, outfolder="/tmp"): Plotter.plot_feature_heatmap(self, outfolder)
def plot(self, outfolder="/tmp"): Plotter.plot_breakdown(self, outfolder)