Example #1
0
    def ___rmse_over_ratio(rmse, weight, outfolder):
        stats = rmse[["ratio", "rmse_best"]]
        filename = "%s/praise_rmse_over_ratio_%.1f" % (outfolder, weight)
        np.savetxt(filename, stats, fmt="%.2f\t&\t%.8f", newline="\t\\\\\n")
        data = stats.set_index("ratio")
        min_ratio = data.idxmin()
        Plotter.plot_data_over_ratios(data,
                                      "rmse_%.1f" % weight,
                                      outfolder,
                                      ylog=True)

        stats_extra = rmse[["ratio", "rmse_extra_best"]]
        filename_extra = "%s/praise_extra_rmse_over_ratio_%.1f" % (outfolder,
                                                                   weight)
        np.savetxt(filename_extra,
                   stats_extra,
                   fmt="%.2f\t&\t%.8f",
                   newline="\t\\\\\n")
        data_extra = stats_extra.set_index("ratio")
        min_ratio_extra = data_extra.idxmin()
        Plotter.plot_data_over_ratios(data_extra,
                                      "rmse_extra_%.1f" % weight,
                                      outfolder,
                                      ylog=True)

        return min_ratio.values[0], min_ratio_extra.values[0]
Example #2
0
    def save_feature_importances(self, outfolder):
        # compute feature importances for each weight
        importances = np.empty(shape=(0, 0))
        for weight in self.dataset.weights:
            lstats = self.dataset.lstats[weight]
            clsset = ClassificationSet.sanitize_and_init(
                self.features.features, lstats.winners, lstats.costs)
            clf = ExtraTreesClassifier()
            clf = clf.fit(clsset.X, clsset.y)
            if importances.shape[0] == 0:
                importances = clf.feature_importances_
            else:
                importances = np.vstack(
                    [importances, clf.feature_importances_])
        # sort feature names by average importance
        sorted_feature_names = [
            name for _, name in sorted(
                zip(importances.mean(axis=0), self.features.features.columns))
        ][::-1]
        importances = pd.DataFrame(data=importances,
                                   columns=self.features.features.columns)
        importances = importances[sorted_feature_names]
        feats = pd.DataFrame(columns=["order", "value", "name", "error"])  #, \
        #dtype={"order": np.int64, "value": np.float_, "name":np.object_, "error": np.float_})
        feats["order"] = np.arange(len(self.features.features.columns))[::-1]
        feats["value"] = importances.mean(axis=0).values
        feats["error"] = importances.std(axis=0).values
        feats["name"] = sorted_feature_names
        feats.to_csv("%s/feats" % outfolder,
                     sep="&",
                     index=False,
                     line_terminator="\\\\\n")

        Plotter.plot_feature_importances(importances, outfolder, 30)
Example #3
0
    def fit_time(raw_sample_stats, outfolder="/tmp"):
        sample_times = pd.DataFrame()
        for ratio in raw_sample_stats.ratios:
            sample_times = sample_times.append(
                raw_sample_stats.get_times(ratio).agg("mean"),
                ignore_index=True)
        sample_times = sample_times.set_index(raw_sample_stats.ratios)
        # save xdata, ydata to file
        sample_times.to_csv("%s/times_over_ratio" % outfolder,
                            sep='&',
                            line_terminator='\\\\\n')
        # plot welfare over ratios
        Plotter.plot_data_over_ratios(sample_times, "time", outfolder)

        xdata = 10000 * sample_times.index
        for algo in sample_times.columns:
            print('==== %s ====' % algo)
            ydata = sample_times[algo]
            popt, pcov = curve_fit(func_nlogn,
                                   xdata,
                                   ydata,
                                   bounds=(0, np.inf))
            print("time nlogn:", popt,
                  np.sqrt(np.diag(pcov)) * 100. / popt, "%")
            popt, pcov = curve_fit(func_poly2,
                                   xdata,
                                   ydata,
                                   bounds=(0, np.inf))
            print("time n2:", popt, np.sqrt(np.diag(pcov)) * 100. / popt, "%")
            popt, pcov = curve_fit(func_poly3,
                                   xdata,
                                   ydata,
                                   bounds=(0, np.inf))
            print("time n3:", popt, np.sqrt(np.diag(pcov)) * 100. / popt, "%")
            popt, pcov = curve_fit(func_poly321,
                                   xdata,
                                   ydata,
                                   bounds=(0, np.inf))
            print("time n321:", popt,
                  np.sqrt(np.diag(pcov)) * 100. / popt, "%")
            popt, pcov = curve_fit(func_n3logn,
                                   xdata,
                                   ydata,
                                   bounds=(0, np.inf))
            print("time n3logn:", popt,
                  np.sqrt(np.diag(pcov)) * 100. / popt, "%")
            popt, pcov = curve_fit(func_n2logn,
                                   xdata,
                                   ydata,
                                   bounds=(0, np.inf))
            print("time n2logn:", popt,
                  np.sqrt(np.diag(pcov)) * 100. / popt, "%")
            popt, pcov = curve_fit(func_nlogn_n,
                                   xdata,
                                   ydata,
                                   bounds=(0, np.inf))
            print("time nlogn+n:", popt,
                  np.sqrt(np.diag(pcov)) * 100. / popt, "%")
Example #4
0
 def plot(self, outfolder="/tmp"):
     outfile = "%s/random_%s" % (outfolder, self.name)
     welfares = self.__get_normalized_welfares()
     data = []
     for algo in self.algos:
         data.append(welfares[welfares.algorithm == algo].welfare.values)
         print("[%s] min = %.2f, max = %.2f" %
               (algo, welfares[welfares.algorithm == algo].welfare.min(),
                welfares[welfares.algorithm == algo].welfare.max()))
     Plotter.boxplot_random(data, self.algos, outfile)
Example #5
0
    def plot(self, outfolder="/tmp"):
        outfile_welfare = "%s/welfare_%s" % (outfolder, self.name)
        outfile_time = "%s/time_%s" % (outfolder, self.name)

        welfares = self.get_welfares_feasible()
        times = self.get_times_feasible()

        # normalize welfare and time by values of optimal algorithm (cplex)
        welfares = welfares.div(welfares.CPLEX, axis=0).multiply(100., axis=0)
        times = times.div(times.CPLEX, axis=0).multiply(100., axis=0)

        Plotter.boxplot_average_case(welfares.values,
                                     self.algos,
                                     outfile_welfare,
                                     ylabel="% of optimal welfare (CPLEX)")
        Plotter.boxplot_average_case(
            times.values,
            self.algos,
            outfile_time,
            top=100000,
            bottom=0.01,
            ylog=True,
            ylabel="% of time of optimal algorithm (CPLEX)")
Example #6
0
 def plot(self, outfolder="/tmp"):
     Plotter.plot_feature_heatmap(self, outfolder)
Example #7
0
 def plot(self, outfolder="/tmp"):
     Plotter.plot_breakdown(self, outfolder)