def validate(X, y_true, quadrant:str): y_pred = model.predict(X) rmse = np.sqrt(mean_squared_error(y_true=y_true, y_pred=y_pred)) print("RMSE (%s): %f" %(quadrant, rmse)) rmse = np.sqrt(mean_squared_error(y_true=np.log10(y_true), y_pred=np.log10(y_pred))) print("RMSLE (%s): %f" %(quadrant, rmse)) out_fn = "%s_%s_b%d_r%s_s%d_%s" %(args.scenario, args.model, args.budget, args.regularize, args.seed, quadrant) fig = plot_scatter_plot(x_data=y_true, y_data=y_pred, labels=["y(true)", "y(pred)"], max_val=cutoff) fig.tight_layout() fig.savefig("scatter_%s.png" %(out_fn)) plt.close(fig) np.savetxt(fname="y_pred_%s.csv" %(out_fn), X=y_pred)
def main(): prog = "python plot_scatter.py any.csv" description = "Reads performances from a two-column .csv and creates a" \ " scatterplot" parser = ArgumentParser(description=description, prog=prog) # General Options # parser.add_argument("-l", "--log", action="store_true", dest="log", # default=False, help="Plot on log scale") parser.add_argument("--max", dest="max", type=float, default=1000, help="Maximum of both axes") parser.add_argument("--min", dest="min", type=float, default=None, help="Minimum of both axes") parser.add_argument("-s", "--save", dest="save", default="", help="Where to save plot instead of showing it?") parser.add_argument("--title", dest="title", default="", help="Optional supertitle for plot") parser.add_argument("--greyFactor", dest="grey_factor", type=float, default=1, help="If an algorithms is not greyFactor-times better" " than the other, show this point less salient, > 1") parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Plot some debug info") parser.add_argument("-f", "--lineFactors", dest="linefactors", default=None, help="Plot X speedup/slowdown, format " "'X,..,X' (no spaces)") parser.add_argument( "-c", "--columns", dest="columns", default='1,2', help="Use these columns from csv; starting at 1, format" " 'xaxis,yaxis' (nospaces)") parser.add_argument("--size", dest="user_fontsize", default=12, type=int, help="Standard fontsize") parser.add_argument("--dpi", dest="dpi", default=100, type=int, help="DPI for saved figure") args, unknown = parser.parse_known_args() if len(unknown) != 1: print "Wrong number of arguments" parser.print_help() sys.exit(1) if args.grey_factor < 1: print "A grey-factor lower than one makes no sense" parser.print_help() sys.exit(1) # Check selected columns columns = [int(float(i)) for i in args.columns.split(",")] if len(columns) != 2: raise ValueError("Selected more or less than two columns: %s" % str(columns)) # As python starts with 0 columns = [i - 1 for i in columns] # Load validationResults res_header, res_data = read_util.read_csv(unknown[0], has_header=True, data_type=np.float) res_data = np.array(res_data) print "Found %s points" % (str(res_data.shape)) # Get data if max(columns) > res_data.shape[1] - 1: raise ValueError("You selected column %d, but there are only %d" % (max(columns) + 1, res_data.shape[1])) if min(columns) < 0: raise ValueError("You selected a column number less than 1") data_x = res_data[:, columns[0]] data_y = res_data[:, columns[1]] label_x = res_header[columns[0]] label_y = res_header[columns[1]] linefactors = list() if args.linefactors is not None: linefactors = [float(i) for i in args.linefactors.split(",")] if len(linefactors) < 1: print "Something is wrong with linefactors: %s" % args.linefactors sys.exit(1) if min(linefactors) < 1: print "A line-factor lower than one makes no sense" sys.exit(1) if args.grey_factor > 1 and args.grey_factor not in linefactors: linefactors.append(args.grey_factor) fig = scatter.plot_scatter_plot(x_data=data_x, y_data=data_y, labels=[label_x, label_y], title=args.title, max_val=args.max, min_val=args.min, grey_factor=args.grey_factor, linefactors=linefactors, debug=args.verbose, user_fontsize=args.user_fontsize, dpi=args.dpi) if args.save != "": print "Save plot to %s" % args.save plot_util.save_plot(fig, args.save, plot_util.get_defaults()['dpi']) else: fig.show()
def main(): prog = "python plot_scatter.py" description = "Plots performances of the best config at one time vs " \ "another in a scatter plot" parser = ArgumentParser(description=description, prog=prog) # General Options parser.add_argument("--max", dest="max", type=float, default=1000, help="Maximum of both axes") parser.add_argument("--min", dest="min", type=float, default=None, help="Minimum of both axes") parser.add_argument("-s", "--save", dest="save", default="", help="Where to save plot instead of " "showing it?") parser.add_argument("--title", dest="title", default="", help="Optional supertitle for plot") parser.add_argument("--greyFactor", dest="grey_factor", type=float, default=1, help="If an algorithms is not greyFactor-" "times better than the other, show this" " point less salient, > 1") parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Plot some debug info") parser.add_argument("-f", "--lineFactors", dest="linefactors", default=None, help="Plot X speedup/slowdown," " format 'X,..,X' (no spaces)") parser.add_argument("--time", dest="time", default=None, help="Plot config at which time?, format 'time1,time2'") parser.add_argument("--obj", dest="obj", default=None, required=True, help="Path to validationObjectiveMatrix-traj-* file") parser.add_argument("--res", dest="res", required=True, help="Path to validationResults-traj-run-* file") parser.add_argument("--minvalue", dest="minvalue", type=float, help="Replace all values smaller than this",) parser.add_argument("--fontsize", dest="fontsize", type=int, default=20, help="Use this fontsize for plotting",) args, unknown = parser.parse_known_args() if len(unknown) != 0: print("Wrong number of arguments") parser.print_help() sys.exit(1) if args.grey_factor < 1: print("A grey-factor lower than one makes no sense") parser.print_help() sys.exit(1) # Load validationResults res_header, res_data = read_util.read_csv(args.res, has_header=True) av_times = [float(row[0]) for row in res_data] if args.time is None: # Print available times and quit print("Choose a time from") print("\n".join(["* %s" % i for i in av_times])) sys.exit(0) time_arr = args.time.split(",") if len(time_arr) != 2 or \ (len(time_arr) == 2 and (time_arr[1] == "" or time_arr[0] == "")): print("Something wrong with %s, should be 'a,b'" % args.time) print("Choose a time from") print("\n".join(["* %s" % i for i in av_times])) sys.exit(0) time_1 = float(time_arr[0]) time_2 = float(time_arr[1]) # Now extract data config_1 = [int(float(row[len(res_header)-2].strip('"'))) for row in res_data if int(float(row[0])) == int(time_1)] config_2 = [int(float(row[len(res_header)-2].strip('"'))) for row in res_data if int(float(row[0])) == int(time_2)] if len(config_1) == 0 or len(config_2) == 0: print("Time int(%s) or int(%s) not found. Choose a time from:" % (time_1, time_2)) print("\n".join(["* %s" % i for i in av_times])) sys.exit(1) config_1 = config_1[0] config_2 = config_2[0] obj_header, obj_data = read_util.read_csv(args.obj, has_header=True) head_template = '"Objective of validation config #%s"' idx_1 = obj_header.index(head_template % config_1) idx_2 = obj_header.index(head_template % config_2) data_one = np.array([float(row[idx_1].strip('"')) for row in obj_data]) data_two = np.array([float(row[idx_2].strip('"')) for row in obj_data]) print("Found %s points for config %d and %s points for config %d" % (str(data_one.shape), config_1, str(data_two.shape), config_2)) linefactors = list() if args.linefactors is not None: linefactors = [float(i) for i in args.linefactors.split(",")] if len(linefactors) < 1: print("Something is wrong with linefactors: %s" % args.linefactors) sys.exit(1) if min(linefactors) < 1: print("A line-factor lower than one makes no sense") sys.exit(1) if args.grey_factor > 1 and args.grey_factor not in linefactors: linefactors.append(args.grey_factor) label_template = 'Objective of validation config #%s, best at %s sec' # This might produce overhead for large .csv files times = [int(float(row[0])) for row in res_data] time_1 = res_data[times.index(int(time_1))][0] time_2 = res_data[times.index(int(time_2))][0] data_one = np.array([max(args.minvalue, i) for i in data_one]) data_two = np.array([max(args.minvalue, i) for i in data_two]) fig = scatter.plot_scatter_plot(x_data=data_one, y_data=data_two, labels=[label_template % (config_1, str(time_1)), label_template % (config_2, str(time_2))], title=args.title, max_val=args.max, min_val=args.min, grey_factor=args.grey_factor, linefactors=linefactors, user_fontsize=args.fontsize, debug=args.verbose) if args.save != "": print("Save plot to %s" % args.save) plot_util.save_plot(fig, args.save, plot_util.get_defaults()['dpi']) else: fig.show()
def scatter_plots(self, plot_log_perf: bool = False): ''' generate scatter plots of all pairs of algorithms in the performance data of the scenario and save them in the output directory Arguments --------- plot_log_perf: bool plot perf on log scale Returns ------- list of all generated file names of plots ''' matplotlib.pyplot.close() self.logger.info("Plotting scatter plots........") plots = [] self.algorithms = self.scenario.algorithms n_algos = len(self.scenario.algorithms) if self.scenario.performance_type[0] == "runtime": max_val = self.scenario.algorithm_cutoff_time else: max_val = self.scenario.performance_data.max().max() for i in range(n_algos): for j in range(i + 1, n_algos): algo_1 = self.scenario.algorithms[i] algo_2 = self.scenario.algorithms[j] y_i = self.scenario.performance_data[algo_1].values y_j = self.scenario.performance_data[algo_2].values matplotlib.pyplot.close() if self.scenario.performance_type[0] == "runtime": fig = plot_scatter_plot( x_data=y_i, y_data=y_j, max_val=max_val, labels=[algo_1, algo_2], metric=self.scenario.performance_type[0]) else: fig = figure(1, dpi=100) ax1 = subplot(aspect='equal') ax1.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) ax1.scatter(y_i, y_j, marker='x', c='black') ax1.set_xlabel(algo_1, fontsize=20) ax1.set_ylabel(algo_2, fontsize=20) if plot_log_perf: ax1.set_xscale("log") ax1.set_yscale("log") fig.tight_layout() out_name = os.path.join( self.output_dn, "scatter_%s_%s.png" % (algo_1.replace("/", "_"), algo_2.replace("/", "_"))) fig.savefig(out_name) plots.append((algo_1, algo_2, out_name)) return plots
def main(): prog = "python plot_scatter.py" description = "Plots performances of the best config at one time for two " \ "configuration runs" parser = ArgumentParser(description=description, prog=prog) # General Options parser.add_argument("--max", dest="max", type=float, default=1000, help="Maximum of both axes") parser.add_argument("--min", dest="min", type=float, default=None, help="Minimum of both axes") parser.add_argument("-s", "--save", dest="save", default="", help="Where to save plot instead of showing it?") parser.add_argument("--title", dest="title", default="", help="Optional supertitle for plot") parser.add_argument("--greyFactor", dest="grey_factor", type=float, default=1, help="If an algorithms is not greyFactor-times better" " than the other, show this point less salient, > 1") parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Plot some debug info") parser.add_argument( "-f", "--lineFactors", dest="linefactors", default=None, help="Plot X speedup/slowdown, format 'X,..,X' (no spaces)") parser.add_argument("--time", dest="time", default=None, type=float, help="Plot config at which time?") parser.add_argument( "--obj1", dest="obj1", default=None, required=True, help="Path to validationObjectiveMatrix-traj-run-* file") parser.add_argument("--res1", dest="res1", required=True, help="Path to validationResults-traj-run-* file") parser.add_argument( "--obj2", dest="obj2", default=None, required=True, help="Path to validationObjectiveMatrix-traj-run-* file") parser.add_argument("--res2", dest="res2", required=True, help="Path to validationResults-traj-run-* file") parser.add_argument( "--minvalue", dest="minvalue", type=float, default=None, help="Replace all values smaller than this", ) parser.add_argument( "--fontsize", dest="fontsize", type=int, default=20, help="Use this fontsize for plotting", ) args, unknown = parser.parse_known_args() if len(unknown) != 0: print "Wrong number of arguments" parser.print_help() sys.exit(1) if args.grey_factor < 1: print "A grey-factor lower than one makes no sense" parser.print_help() sys.exit(1) # Load validationResults res1_header, res1_data = read_util.read_csv(args.res1, has_header=True) res2_header, res2_data = read_util.read_csv(args.res2, has_header=True) av_times = [float(row[0]) for row in res1_data] if args.time is None: # Print available times and quit print "Choose a time from" print "\n".join(["* %s" % i for i in av_times]) sys.exit(0) # Now extract data config_1 = [ int(float(row[len(res1_header) - 2].strip('"'))) for row in res1_data if int(float(row[0])) == int(args.time) ] config_2 = [ int(float(row[len(res2_header) - 2].strip('"'))) for row in res2_data if int(float(row[0])) == int(args.time) ] if len(config_1) == 0 or len(config_2) == 0: print "Time int(%s) not found. Choose a time from:" % (args.time) print "\n".join(["* %s" % i for i in av_times]) sys.exit(1) config_1 = config_1[0] config_2 = config_2[0] obj1_header, obj1_data = read_util.read_csv(args.obj1, has_header=True) obj2_header, obj2_data = read_util.read_csv(args.obj2, has_header=True) head_template = '"Objective of validation config #%s"' idx_1 = obj1_header.index(head_template % config_1) idx_2 = obj2_header.index(head_template % config_2) data_one = np.array([float(row[idx_1].strip('"')) for row in obj1_data]) data_two = np.array([float(row[idx_2].strip('"')) for row in obj2_data]) print "Found %s points for config %d and %s points for config %d" % \ (str(data_one.shape), config_1, str(data_two.shape), config_2) linefactors = list() if args.linefactors is not None: linefactors = [float(i) for i in args.linefactors.split(",")] if len(linefactors) < 1: print "Something is wrong with linefactors: %s" % args.linefactors sys.exit(1) if min(linefactors) < 1: print "A line-factor lower than one makes no sense" sys.exit(1) if args.grey_factor > 1 and args.grey_factor not in linefactors: linefactors.append(args.grey_factor) label_template = '%s %20s at %s sec' l1 = label_template % ("obj1", os.path.basename( args.obj1)[:20], str(args.time)) l2 = label_template % ("obj2", os.path.basename( args.obj2)[:20], str(args.time)) if args.minvalue is not None: print "Replace all values lower than %f" % args.minvalue data_one = np.array([max(args.minvalue, i) for i in data_one]) data_two = np.array([max(args.minvalue, i) for i in data_two]) fig = scatter.plot_scatter_plot(x_data=data_one, y_data=data_two, labels=[l1, l2], title=args.title, max_val=args.max, min_val=args.min, grey_factor=args.grey_factor, linefactors=linefactors, user_fontsize=args.fontsize, debug=args.verbose) if args.save != "": print "Save plot to %s" % args.save plot_util.save_plot(fig=fig, save=args.save, dpi=plot_util.get_defaults()['dpi']) else: plt.show()