예제 #1
0
def validate(X, y_true, quadrant:str):
    
    y_pred = model.predict(X)
    rmse = np.sqrt(mean_squared_error(y_true=y_true, y_pred=y_pred))
    print("RMSE (%s): %f" %(quadrant, rmse))
    rmse = np.sqrt(mean_squared_error(y_true=np.log10(y_true), y_pred=np.log10(y_pred)))
    print("RMSLE (%s): %f" %(quadrant, rmse))
    
    out_fn = "%s_%s_b%d_r%s_s%d_%s" %(args.scenario, args.model, args.budget, args.regularize, args.seed, quadrant)
    
    fig = plot_scatter_plot(x_data=y_true, y_data=y_pred, labels=["y(true)", "y(pred)"], max_val=cutoff)
    fig.tight_layout()
    fig.savefig("scatter_%s.png" %(out_fn))
    plt.close(fig)
    
    np.savetxt(fname="y_pred_%s.csv" %(out_fn), X=y_pred)
예제 #2
0
def main():
    prog = "python plot_scatter.py any.csv"
    description = "Reads performances from a two-column .csv and creates a" \
                  " scatterplot"

    parser = ArgumentParser(description=description, prog=prog)

    # General Options
    # parser.add_argument("-l", "--log", action="store_true", dest="log",
    #                     default=False, help="Plot on log scale")
    parser.add_argument("--max",
                        dest="max",
                        type=float,
                        default=1000,
                        help="Maximum of both axes")
    parser.add_argument("--min",
                        dest="min",
                        type=float,
                        default=None,
                        help="Minimum of both axes")
    parser.add_argument("-s",
                        "--save",
                        dest="save",
                        default="",
                        help="Where to save plot instead of showing it?")
    parser.add_argument("--title",
                        dest="title",
                        default="",
                        help="Optional supertitle for plot")
    parser.add_argument("--greyFactor",
                        dest="grey_factor",
                        type=float,
                        default=1,
                        help="If an algorithms is not greyFactor-times better"
                        " than the other, show this point less salient, > 1")
    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        action="store_true",
                        default=False,
                        help="Plot some debug info")
    parser.add_argument("-f",
                        "--lineFactors",
                        dest="linefactors",
                        default=None,
                        help="Plot X speedup/slowdown, format "
                        "'X,..,X' (no spaces)")
    parser.add_argument(
        "-c",
        "--columns",
        dest="columns",
        default='1,2',
        help="Use these columns from csv; starting at 1, format"
        " 'xaxis,yaxis' (nospaces)")
    parser.add_argument("--size",
                        dest="user_fontsize",
                        default=12,
                        type=int,
                        help="Standard fontsize")
    parser.add_argument("--dpi",
                        dest="dpi",
                        default=100,
                        type=int,
                        help="DPI for saved figure")

    args, unknown = parser.parse_known_args()

    if len(unknown) != 1:
        print "Wrong number of arguments"
        parser.print_help()
        sys.exit(1)

    if args.grey_factor < 1:
        print "A grey-factor lower than one makes no sense"
        parser.print_help()
        sys.exit(1)

    # Check selected columns
    columns = [int(float(i)) for i in args.columns.split(",")]
    if len(columns) != 2:
        raise ValueError("Selected more or less than two columns: %s" %
                         str(columns))
    # As python starts with 0
    columns = [i - 1 for i in columns]

    # Load validationResults
    res_header, res_data = read_util.read_csv(unknown[0],
                                              has_header=True,
                                              data_type=np.float)
    res_data = np.array(res_data)
    print "Found %s points" % (str(res_data.shape))

    # Get data
    if max(columns) > res_data.shape[1] - 1:
        raise ValueError("You selected column %d, but there are only %d" %
                         (max(columns) + 1, res_data.shape[1]))
    if min(columns) < 0:
        raise ValueError("You selected a column number less than 1")
    data_x = res_data[:, columns[0]]
    data_y = res_data[:, columns[1]]
    label_x = res_header[columns[0]]
    label_y = res_header[columns[1]]

    linefactors = list()
    if args.linefactors is not None:
        linefactors = [float(i) for i in args.linefactors.split(",")]
        if len(linefactors) < 1:
            print "Something is wrong with linefactors: %s" % args.linefactors
            sys.exit(1)
        if min(linefactors) < 1:
            print "A line-factor lower than one makes no sense"
            sys.exit(1)
    if args.grey_factor > 1 and args.grey_factor not in linefactors:
        linefactors.append(args.grey_factor)

    fig = scatter.plot_scatter_plot(x_data=data_x,
                                    y_data=data_y,
                                    labels=[label_x, label_y],
                                    title=args.title,
                                    max_val=args.max,
                                    min_val=args.min,
                                    grey_factor=args.grey_factor,
                                    linefactors=linefactors,
                                    debug=args.verbose,
                                    user_fontsize=args.user_fontsize,
                                    dpi=args.dpi)

    if args.save != "":
        print "Save plot to %s" % args.save
        plot_util.save_plot(fig, args.save, plot_util.get_defaults()['dpi'])
    else:
        fig.show()
예제 #3
0
def main():
    prog = "python plot_scatter.py"
    description = "Plots performances of the best config at one time vs " \
                  "another in a scatter plot"

    parser = ArgumentParser(description=description, prog=prog)

    # General Options
    parser.add_argument("--max", dest="max", type=float,
                        default=1000, help="Maximum of both axes")
    parser.add_argument("--min", dest="min", type=float,
                        default=None, help="Minimum of both axes")
    parser.add_argument("-s", "--save", dest="save",
                        default="", help="Where to save plot instead of "
                                         "showing it?")
    parser.add_argument("--title", dest="title",
                        default="", help="Optional supertitle for plot")
    parser.add_argument("--greyFactor", dest="grey_factor", type=float,
                        default=1, help="If an algorithms is not greyFactor-"
                                        "times better than the other, show this"
                                        " point less salient, > 1")
    parser.add_argument("-v", "--verbose", dest="verbose", action="store_true",
                        default=False, help="Plot some debug info")
    parser.add_argument("-f", "--lineFactors", dest="linefactors",
                        default=None, help="Plot X speedup/slowdown,"
                                           " format 'X,..,X' (no spaces)")
    parser.add_argument("--time", dest="time", default=None,
                        help="Plot config at which time?, format 'time1,time2'")
    parser.add_argument("--obj", dest="obj", default=None, required=True,
                        help="Path to validationObjectiveMatrix-traj-* file")
    parser.add_argument("--res", dest="res", required=True,
                        help="Path to validationResults-traj-run-* file")
    parser.add_argument("--minvalue", dest="minvalue", type=float,
                        help="Replace all values smaller than this",)
    parser.add_argument("--fontsize", dest="fontsize", type=int, default=20,
                        help="Use this fontsize for plotting",)

    args, unknown = parser.parse_known_args()

    if len(unknown) != 0:
        print("Wrong number of arguments")
        parser.print_help()
        sys.exit(1)

    if args.grey_factor < 1:
        print("A grey-factor lower than one makes no sense")
        parser.print_help()
        sys.exit(1)

    # Load validationResults
    res_header, res_data = read_util.read_csv(args.res, has_header=True)
    av_times = [float(row[0]) for row in res_data]
    if args.time is None:
        # Print available times and quit
        print("Choose a time from")
        print("\n".join(["* %s" % i for i in av_times]))
        sys.exit(0)
    time_arr = args.time.split(",")
    if len(time_arr) != 2 or \
            (len(time_arr) == 2 and (time_arr[1] == "" or time_arr[0] == "")):
        print("Something wrong with %s, should be 'a,b'" % args.time)
        print("Choose a time from")
        print("\n".join(["* %s" % i for i in av_times]))
        sys.exit(0)
    time_1 = float(time_arr[0])
    time_2 = float(time_arr[1])

    # Now extract data
    config_1 = [int(float(row[len(res_header)-2].strip('"'))) for row in
                res_data if int(float(row[0])) == int(time_1)]
    config_2 = [int(float(row[len(res_header)-2].strip('"'))) for row in
                res_data if int(float(row[0])) == int(time_2)]
    if len(config_1) == 0 or len(config_2) == 0:
        print("Time int(%s) or int(%s) not found. Choose a time from:" %
              (time_1, time_2))
        print("\n".join(["* %s" % i for i in av_times]))
        sys.exit(1)
    config_1 = config_1[0]
    config_2 = config_2[0]

    obj_header, obj_data = read_util.read_csv(args.obj, has_header=True)
    head_template = '"Objective of validation config #%s"'
    idx_1 = obj_header.index(head_template % config_1)
    idx_2 = obj_header.index(head_template % config_2)

    data_one = np.array([float(row[idx_1].strip('"')) for row in obj_data])
    data_two = np.array([float(row[idx_2].strip('"')) for row in obj_data])

    print("Found %s points for config %d and %s points for config %d" %
          (str(data_one.shape), config_1, str(data_two.shape), config_2))

    linefactors = list()
    if args.linefactors is not None:
        linefactors = [float(i) for i in args.linefactors.split(",")]
        if len(linefactors) < 1:
            print("Something is wrong with linefactors: %s" % args.linefactors)
            sys.exit(1)
        if min(linefactors) < 1:
            print("A line-factor lower than one makes no sense")
            sys.exit(1)
    if args.grey_factor > 1 and args.grey_factor not in linefactors:
        linefactors.append(args.grey_factor)

    label_template = 'Objective of validation config #%s, best at %s sec'
    
    # This might produce overhead for large .csv files
    times = [int(float(row[0])) for row in res_data]
    time_1 = res_data[times.index(int(time_1))][0]
    time_2 = res_data[times.index(int(time_2))][0]

    data_one = np.array([max(args.minvalue, i) for i in data_one])
    data_two = np.array([max(args.minvalue, i) for i in data_two])

    fig = scatter.plot_scatter_plot(x_data=data_one, y_data=data_two,
                                    labels=[label_template %
                                            (config_1, str(time_1)),
                                            label_template %
                                            (config_2, str(time_2))],
                                    title=args.title,
                                    max_val=args.max, min_val=args.min,
                                    grey_factor=args.grey_factor,
                                    linefactors=linefactors,
                                    user_fontsize=args.fontsize,
                                    debug=args.verbose)
    if args.save != "":
        print("Save plot to %s" % args.save)
        plot_util.save_plot(fig, args.save, plot_util.get_defaults()['dpi'])
    else:
        fig.show()
예제 #4
0
    def scatter_plots(self, plot_log_perf: bool = False):
        '''
            generate scatter plots of all pairs of algorithms in the performance data of the scenario
            and save them in the output directory

            Arguments
            ---------
            plot_log_perf: bool
                plot perf on log scale

            Returns
            -------
            list of all generated file names of plots
        '''
        matplotlib.pyplot.close()
        self.logger.info("Plotting scatter plots........")

        plots = []
        self.algorithms = self.scenario.algorithms
        n_algos = len(self.scenario.algorithms)

        if self.scenario.performance_type[0] == "runtime":
            max_val = self.scenario.algorithm_cutoff_time
        else:
            max_val = self.scenario.performance_data.max().max()

        for i in range(n_algos):
            for j in range(i + 1, n_algos):
                algo_1 = self.scenario.algorithms[i]
                algo_2 = self.scenario.algorithms[j]
                y_i = self.scenario.performance_data[algo_1].values
                y_j = self.scenario.performance_data[algo_2].values

                matplotlib.pyplot.close()

                if self.scenario.performance_type[0] == "runtime":
                    fig = plot_scatter_plot(
                        x_data=y_i,
                        y_data=y_j,
                        max_val=max_val,
                        labels=[algo_1, algo_2],
                        metric=self.scenario.performance_type[0])
                else:
                    fig = figure(1, dpi=100)
                    ax1 = subplot(aspect='equal')
                    ax1.grid(True,
                             linestyle='-',
                             which='major',
                             color='lightgrey',
                             alpha=0.5)
                    ax1.scatter(y_i, y_j, marker='x', c='black')
                    ax1.set_xlabel(algo_1, fontsize=20)
                    ax1.set_ylabel(algo_2, fontsize=20)
                    if plot_log_perf:
                        ax1.set_xscale("log")
                        ax1.set_yscale("log")
                    fig.tight_layout()

                out_name = os.path.join(
                    self.output_dn, "scatter_%s_%s.png" %
                    (algo_1.replace("/", "_"), algo_2.replace("/", "_")))
                fig.savefig(out_name)
                plots.append((algo_1, algo_2, out_name))

        return plots
예제 #5
0
def main():
    prog = "python plot_scatter.py"
    description = "Plots performances of the best config at one time for two " \
                  "configuration runs"

    parser = ArgumentParser(description=description, prog=prog)

    # General Options
    parser.add_argument("--max",
                        dest="max",
                        type=float,
                        default=1000,
                        help="Maximum of both axes")
    parser.add_argument("--min",
                        dest="min",
                        type=float,
                        default=None,
                        help="Minimum of both axes")
    parser.add_argument("-s",
                        "--save",
                        dest="save",
                        default="",
                        help="Where to save plot instead of showing it?")
    parser.add_argument("--title",
                        dest="title",
                        default="",
                        help="Optional supertitle for plot")
    parser.add_argument("--greyFactor",
                        dest="grey_factor",
                        type=float,
                        default=1,
                        help="If an algorithms is not greyFactor-times better"
                        " than the other, show this point less salient, > 1")
    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        action="store_true",
                        default=False,
                        help="Plot some debug info")
    parser.add_argument(
        "-f",
        "--lineFactors",
        dest="linefactors",
        default=None,
        help="Plot X speedup/slowdown, format 'X,..,X' (no spaces)")
    parser.add_argument("--time",
                        dest="time",
                        default=None,
                        type=float,
                        help="Plot config at which time?")
    parser.add_argument(
        "--obj1",
        dest="obj1",
        default=None,
        required=True,
        help="Path to validationObjectiveMatrix-traj-run-* file")
    parser.add_argument("--res1",
                        dest="res1",
                        required=True,
                        help="Path to validationResults-traj-run-* file")
    parser.add_argument(
        "--obj2",
        dest="obj2",
        default=None,
        required=True,
        help="Path to validationObjectiveMatrix-traj-run-* file")
    parser.add_argument("--res2",
                        dest="res2",
                        required=True,
                        help="Path to validationResults-traj-run-* file")
    parser.add_argument(
        "--minvalue",
        dest="minvalue",
        type=float,
        default=None,
        help="Replace all values smaller than this",
    )
    parser.add_argument(
        "--fontsize",
        dest="fontsize",
        type=int,
        default=20,
        help="Use this fontsize for plotting",
    )

    args, unknown = parser.parse_known_args()

    if len(unknown) != 0:
        print "Wrong number of arguments"
        parser.print_help()
        sys.exit(1)

    if args.grey_factor < 1:
        print "A grey-factor lower than one makes no sense"
        parser.print_help()
        sys.exit(1)

    # Load validationResults
    res1_header, res1_data = read_util.read_csv(args.res1, has_header=True)
    res2_header, res2_data = read_util.read_csv(args.res2, has_header=True)

    av_times = [float(row[0]) for row in res1_data]
    if args.time is None:
        # Print available times and quit
        print "Choose a time from"
        print "\n".join(["* %s" % i for i in av_times])
        sys.exit(0)

    # Now extract data
    config_1 = [
        int(float(row[len(res1_header) - 2].strip('"'))) for row in res1_data
        if int(float(row[0])) == int(args.time)
    ]
    config_2 = [
        int(float(row[len(res2_header) - 2].strip('"'))) for row in res2_data
        if int(float(row[0])) == int(args.time)
    ]
    if len(config_1) == 0 or len(config_2) == 0:
        print "Time int(%s) not found. Choose a time from:" % (args.time)
        print "\n".join(["* %s" % i for i in av_times])
        sys.exit(1)
    config_1 = config_1[0]
    config_2 = config_2[0]

    obj1_header, obj1_data = read_util.read_csv(args.obj1, has_header=True)
    obj2_header, obj2_data = read_util.read_csv(args.obj2, has_header=True)

    head_template = '"Objective of validation config #%s"'
    idx_1 = obj1_header.index(head_template % config_1)
    idx_2 = obj2_header.index(head_template % config_2)

    data_one = np.array([float(row[idx_1].strip('"')) for row in obj1_data])
    data_two = np.array([float(row[idx_2].strip('"')) for row in obj2_data])

    print "Found %s points for config %d and %s points for config %d" % \
          (str(data_one.shape), config_1, str(data_two.shape), config_2)

    linefactors = list()
    if args.linefactors is not None:
        linefactors = [float(i) for i in args.linefactors.split(",")]
        if len(linefactors) < 1:
            print "Something is wrong with linefactors: %s" % args.linefactors
            sys.exit(1)
        if min(linefactors) < 1:
            print "A line-factor lower than one makes no sense"
            sys.exit(1)
    if args.grey_factor > 1 and args.grey_factor not in linefactors:
        linefactors.append(args.grey_factor)

    label_template = '%s %20s at %s sec'
    l1 = label_template % ("obj1", os.path.basename(
        args.obj1)[:20], str(args.time))
    l2 = label_template % ("obj2", os.path.basename(
        args.obj2)[:20], str(args.time))

    if args.minvalue is not None:
        print "Replace all values lower than %f" % args.minvalue
        data_one = np.array([max(args.minvalue, i) for i in data_one])
        data_two = np.array([max(args.minvalue, i) for i in data_two])

    fig = scatter.plot_scatter_plot(x_data=data_one,
                                    y_data=data_two,
                                    labels=[l1, l2],
                                    title=args.title,
                                    max_val=args.max,
                                    min_val=args.min,
                                    grey_factor=args.grey_factor,
                                    linefactors=linefactors,
                                    user_fontsize=args.fontsize,
                                    debug=args.verbose)

    if args.save != "":
        print "Save plot to %s" % args.save
        plot_util.save_plot(fig=fig,
                            save=args.save,
                            dpi=plot_util.get_defaults()['dpi'])
    else:
        plt.show()