def err_fn_speedups(db, err_fn, output=None, sort=False, job="xval", **kwargs): """ Plot speedup over the baseline of all classifiers for an err_fn. """ fig = plt.figure() ax = fig.add_subplot(111) for classifier in db.classification_classifiers: basename = ml.classifier_basename(classifier) performances = [ row for row in db.execute( "SELECT speedup\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn)) ] if sort: performances = sorted(performances, reverse=True) plt.plot(performances, "-", label=basename) plt.plot([1 for _ in performances], "-", label="ZeroR") title = kwargs.pop("title", err_fn) ax.set_yscale("log") plt.title(title) plt.ylabel("Speedup (log)") plt.xlabel("Test instances") plt.xlim(xmin=0, xmax=len(performances)) plt.legend() viz.finalise(output, **kwargs)
def err_fn_speedups(db, err_fn, output=None, sort=False, job="xval", **kwargs): """ Plot speedup over the baseline of all classifiers for an err_fn. """ fig = plt.figure() ax = fig.add_subplot(111) for classifier in db.classification_classifiers: basename = ml.classifier_basename(classifier) performances = [row for row in db.execute("SELECT speedup\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn))] if sort: performances = sorted(performances, reverse=True) plt.plot(performances, "-", label=basename) plt.plot([1 for _ in performances], "-", label="ZeroR") title = kwargs.pop("title", err_fn) ax.set_yscale("log") plt.title(title) plt.ylabel("Speedup (log)") plt.xlabel("Test instances") plt.xlim(xmin=0, xmax=len(performances)) plt.legend() viz.finalise(output, **kwargs)
def classifier_speedups(db, classifier, output=None, sort=False, job="xval_classifiers", **kwargs): """ Plot speedup over the baseline of a classifier for each err_fn. """ for err_fn in db.err_fns: performances = [ row for row in db.execute( "SELECT speedup\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn)) ] if sort: performances = sorted(performances, reverse=True) plt.plot(performances, "-", label=err_fn) basename = ml.classifier_basename(classifier) plt.title(basename) plt.ylabel("Speedup") plt.xlabel("Test instances") plt.axhline(y=1, color="k") plt.xlim(xmin=0, xmax=len(performances)) plt.legend() viz.finalise(output, **kwargs)
def eval_classifiers(db, classifiers, err_fns, job, training, testing): """ Cross validate a set of classifiers and err_fns. """ for classifier in classifiers: meta = Classifier(classifier) prof.start("train classifier") meta.build_classifier(training) prof.stop("train classifier") basename = ml.classifier_basename(classifier.classname) for err_fn in err_fns: io.debug(job, basename, err_fn.func.__name__, testing.num_instances) for j,instance in enumerate(testing): eval_classifier_instance(job, db, meta, instance, err_fn, training) db.commit()
def runtime_regression(db, output=None, job="xval", **kwargs): """ Plot accuracy of a classifier at predicted runtime. """ fig = plt.figure() ax = fig.add_subplot(111) colors = sns.color_palette() i, actual = 0, [] for i, classifier in enumerate(db.regression_classifiers): basename = ml.classifier_basename(classifier) actual, predicted = zip(*sorted([ row for row in db.execute( "SELECT\n" " actual,\n" " predicted\n" "FROM runtime_regression_results\n" "WHERE job=? AND classifier=?", (job, classifier)) ], key=lambda x: x[0], reverse=True)) if basename == "ZeroR": ax.plot(predicted, label=basename, color=colors[i - 1]) else: ax.scatter(np.arange(len(predicted)), predicted, label=basename, color=colors[i - 1]) ax.plot(actual, label="Actual", color=colors[i]) ax.set_yscale("log") plt.xlim(0, len(actual)) plt.legend() title = kwargs.pop("title", "Runtime regression for " + job) plt.title(title) plt.xlabel("Test instances (sorted by descending runtime)") plt.ylabel("Runtime (ms, log)") viz.finalise(output, **kwargs)
def runtime_regression(db, output=None, job="xval", **kwargs): """ Plot accuracy of a classifier at predicted runtime. """ fig = plt.figure() ax = fig.add_subplot(111) colors = sns.color_palette() i, actual = 0, [] for i,classifier in enumerate(db.regression_classifiers): basename = ml.classifier_basename(classifier) actual, predicted = zip(*sorted([ row for row in db.execute( "SELECT\n" " actual,\n" " predicted\n" "FROM runtime_regression_results\n" "WHERE job=? AND classifier=?", (job, classifier) ) ], key=lambda x: x[0], reverse=True)) if basename == "ZeroR": ax.plot(predicted, label=basename, color=colors[i - 1]) else: ax.scatter(np.arange(len(predicted)), predicted, label=basename, color=colors[i - 1]) ax.plot(actual, label="Actual", color=colors[i]) ax.set_yscale("log") plt.xlim(0, len(actual)) plt.legend() title = kwargs.pop("title", "Runtime regression for " + job) plt.title(title) plt.xlabel("Test instances (sorted by descending runtime)") plt.ylabel("Runtime (ms, log)") viz.finalise(output, **kwargs)
def classifier_speedups(db, classifier, output=None, sort=False, job="xval_classifiers", **kwargs): """ Plot speedup over the baseline of a classifier for each err_fn. """ for err_fn in db.err_fns: performances = [row for row in db.execute("SELECT speedup\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn))] if sort: performances = sorted(performances, reverse=True) plt.plot(performances, "-", label=err_fn) basename = ml.classifier_basename(classifier) plt.title(basename) plt.ylabel("Speedup") plt.xlabel("Test instances") plt.axhline(y=1, color="k") plt.xlim(xmin=0, xmax=len(performances)) plt.legend() viz.finalise(output, **kwargs)
def eval_speedup_regressors(db, classifiers, baseline, rank_fn, table, job, training, testing): maxwgsize_index = testing.attribute_by_name("kern_max_wg_size").index wg_c_index = testing.attribute_by_name("wg_c").index wg_r_index = testing.attribute_by_name("wg_r").index insert_str = ("INSERT INTO {} VALUES " "(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)".format(table)) for classifier in classifiers: meta = Classifier(classifier) prof.start("train classifier") meta.build_classifier(training) prof.stop("train classifier") basename = ml.classifier_basename(classifier.classname) classifier_id = db.classifier_id(classifier) io.debug(job, basename, testing.num_instances) scenarios = set([instance.get_string_value(0) for instance in testing]) instances = zip(scenarios, [ (instance for instance in testing if instance.get_string_value(0) == scenario).next() for scenario in scenarios ]) for scenario,instance in instances: maxwgsize = int(instance.get_value(maxwgsize_index)) wlegal = space.enumerate_wlegal_params(maxwgsize) predictions = [] elapsed = 0 for params in wlegal: wg_c, wg_r = unhash_params(params) instance.set_value(wg_c_index, wg_c) instance.set_value(wg_r_index, wg_r) # Predict the speedup for a particular set of # parameters. prof.start() predicted = meta.classify_instance(instance) elapsed += prof.elapsed() predictions.append((params, predicted)) # Rank the predictions from highest to lowest speedup. predictions = sorted(predictions, key=lambda x: x[1], reverse=True) row = db.execute( "SELECT " " oracle_param," " (" " SELECT mean FROM runtime_stats " " WHERE scenario=? AND params=?" " ) * 1.0 / oracle_runtime AS oracle_speedup," " worst_runtime / oracle_runtime AS actual_range " "FROM scenario_stats " "WHERE scenario=?", (scenario,baseline,scenario)).fetchone() actual = row[:2] predicted_range = predictions[-1][1] - predictions[0][1] actual_range = row[2] - row[1] num_attempts = 1 while True: predicted = predictions.pop(0) try: speedup = db.speedup(scenario, baseline, predicted[0]) perf = db.perf(scenario, predicted[0]) try: speedup_he = self.speedup(scenario, HE_PARAM, predicted[0]) except: speedup_he = None try: speedup_mo = self.speedup(scenario, MO_PARAM, predicted[0]) except: speedup_mo = None db.execute(insert_str, (job, classifier_id, scenario, actual[0], actual[1], predicted[0], predicted[1], actual_range, predicted_range, num_attempts, 1 if predicted[0] == actual[0] else 0, perf, speedup, speedup_he, speedup_mo, elapsed)) break except _db.MissingDataError: num_attempts += 1 pass db.commit()
def test_classifier_basename(self): self._test("ZeroR", ml.classifier_basename("weka.classifiers." "rules.ZeroR")) self._test("SMO", ml.classifier_basename("weka.classifiers." "functions.SMO -C 1.0"))
def visualise_classification_job(db, job): basedir = "img/classification/{}/".format(job) fs.mkdir(basedir) fs.mkdir(basedir + "classifiers") fs.mkdir(basedir + "err_fns") visualise.err_fn_performance(db, basedir + "err_fns.png", job=job) # Bar plot of all results. visualise.classification(db, "img/classification/{}.png".format(job), job=job) # Per-classifier plots. for i,classifier in enumerate(db.classification_classifiers): visualise.classifier_speedups(db, classifier, basedir + "classifiers/{}.png".format(i), job=job) # Per-err_fn plots. for err_fn in db.err_fns: visualise.err_fn_speedups(db, err_fn, basedir + "err_fns/{}.png".format(err_fn), job=job, sort=True) # Results table. query = db.execute( "SELECT classifier,err_fn,Count(*) AS count\n" "FROM classification_results\n" "WHERE job=? GROUP BY classifier,err_fn", (job,) ) results = [] for classifier,err_fn,count in query: correct, illegal, refused, performance, speedup = zip(*[ row for row in db.execute( "SELECT correct,illegal,refused,performance,speedup\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn) ) ]) results.append([ classifier, err_fn, (sum(correct) / count) * 100, (sum(illegal) / count) * 100, (sum(refused) / count) * 100, min(performance) * 100, labmath.geomean(performance) * 100, max(performance) * 100, min(speedup), labmath.geomean(speedup), max(speedup) ]) str_args = { "float_format": lambda f: "{:.2f}".format(f) } for i in range(len(results)): results[i][0] = ml.classifier_basename(results[i][0]) columns=( "CLASSIFIER", "ERR_FN", "ACC %", "INV %", "REF %", "Omin %", "Oavg %", "Omax %", "Smin", "Savg", "Smax", ) latex.table(results, output=fs.path(experiment.TAB_ROOT, job + ".tex"), columns=columns, **str_args)
def classification(db, output=None, job="xval", **kwargs): err_fns = db.err_fns base_err_fn = err_fns[0] # Get a list of classifiers and result counts. query = db.execute( "SELECT classifier,Count(*) AS count\n" "FROM classification_results\n" "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n" "GROUP BY classifier", (job,base_err_fn) ) results = [] # Add baseline results. baseline = ("4x4") correct = db.execute("SELECT Count(*) * 1.0 / 3 FROM classification_results " "WHERE job=? AND actual=?", (job,baseline)).fetchone()[0] illegal = 0 refused = 0 time = 0 terr = 0 speedup = (1, 0) perfs = [ row[1] for row in db.execute( "SELECT " " DISTINCT runtime_stats.scenario, " " (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 " "FROM classification_results " "LEFT JOIN runtime_stats " " ON classification_results.scenario=runtime_stats.scenario " "LEFT JOIN scenario_stats " " ON classification_results.scenario=scenario_stats.scenario " "WHERE job=? and runtime_stats.params=?", (job, baseline) ) ] perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True)) results.append(["ZeroR", correct, illegal, refused, time, terr, speedup, speedup, speedup, perf, perf, perf]) # Get results for classifier,count in query: basename = ml.classifier_basename(classifier) correct, illegal, refused, time, terr = db.execute( "SELECT\n" " (SUM(correct) / CAST(? AS FLOAT)) * 100,\n" " (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n" " (SUM(refused) / CAST(? AS FLOAT)) * 100,\n" " AVG(time) + 2.5,\n" " CONFERROR(time, .95) * 1.5\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (count, count, count, job, classifier, base_err_fn) ).fetchone() # Get a list of mean speedups for each err_fn. speedups = [ db.execute( "SELECT\n" " AVG(speedup),\n" " CONFERROR(speedup, .95)\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn) ).fetchone() for err_fn in err_fns ] # Get a list of mean perfs for each err_fn. perfs = [ db.execute( "SELECT\n" " AVG(performance) * 100.0,\n" " CONFERROR(performance, .95) * 100.0\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn) ).fetchone() for err_fn in err_fns ] results.append([basename, correct, illegal, refused, time, terr] + speedups + perfs) # Zip into lists. labels, correct, illegal, refused, time, terr = zip(*[ (text.truncate(result[0], 40), result[1], result[2], result[3], result[4], result[5]) for result in results ]) X = np.arange(len(labels)) # PLOT TIMES width = .8 ax = plt.subplot(4, 1, 1) ax.bar(X + .1, time, width=width) ax.set_xticks(X + .4) ax.set_xticklabels(labels) ax.set_ylim(0, 10) ax.set_ylabel("Classification time (ms)") # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = ax.errorbar(X + .5, time, fmt="none", yerr=terr, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) # RATIOS width = (.8 / 3) ax = plt.subplot(4, 1, 2) ax.bar(X + .1, illegal, width=width, color=sns.color_palette("Reds", 1), label="Illegal") ax.bar(X + .1 + width, refused, width=width, color=sns.color_palette("Oranges", 1), label="Refused") ax.bar(X + .1 + 2 * width, correct, width=width, color=sns.color_palette("Blues", 1), label="Accurate") ax.set_xticks(X + .4) ax.set_ylabel("Ratio") ax.set_ylim(0, 35) ax.set_xticklabels(labels) ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # Plot speedups. ax = plt.subplot(4, 1, 3) width = (.8 / 3) colors=sns.color_palette("Greens", len(err_fns)) for i,err_fn in enumerate(db.err_fns): pairs = [result[6 + i] for result in results] speedups, yerrs = zip(*pairs) ax.bar(X + .1 + (i * width), speedups, width=width, label=errfn2label(err_fn), color=colors[i]) # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, speedups, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) ax.set_xticks(X + .4) ax.set_xticklabels(labels) ax.set_ylim(0, 7) ax.set_xticks(X + .4, labels) ax.set_ylabel("Speedup") art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # PERFORMANCE colors=sns.color_palette("Blues", len(err_fns)) width = (.8 / 3) ax = plt.subplot(4, 1, 4) for i,err_fn in enumerate(db.err_fns): pairs = [result[9 + i] for result in results] perfs, yerrs = zip(*pairs) ax.bar(X + .1 + (i * width), perfs, width=width, label=errfn2label(err_fn), color=colors[i]) # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, perfs, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) ax.set_xticks(X + .4) ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) ax.set_xticklabels(labels) ax.set_ylim(0, 100) ax.set_ylabel("Performance") ax.set_xticks(X + .4, labels) title = kwargs.pop("title", "Classification results for " + job) plt.title(title) # Add legend *beneath* plot. To do this, we need to pass some # extra arguments to plt.savefig(). See: # # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)] viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
def speedup_classification(db, output=None, job="xval", **kwargs): """ Plot performance of classification using speedup regression. """ # Get a list of classifiers and result counts. query = db.execute( "SELECT classifier,Count(*) AS count\n" "FROM speedup_classification_results\n" "WHERE job=? GROUP BY classifier", (job,) ) results = [] for classifier,count in query: basename = ml.classifier_basename(classifier) correct = db.execute( "SELECT\n" " (SUM(correct) / CAST(? AS FLOAT)) * 100\n" "FROM speedup_classification_results\n" "WHERE job=? AND classifier=?", (count, job, classifier) ).fetchone()[0] # Get a list of mean speedups for each err_fn. speedups = [ row for row in db.execute( "SELECT\n" " AVG(speedup) * 100,\n" " CONFERROR(speedup, .95) * 100,\n" " AVG(performance) * 100,\n" " CONFERROR(performance, .95) * 100\n" "FROM speedup_classification_results\n" "WHERE job=? AND classifier=?", (job, classifier) ).fetchone() ] results.append([basename, correct] + speedups) # Zip into lists. labels, correct, speedups, yerrs, perfs, perf_yerrs = zip(*results) X = np.arange(len(labels)) # Bar width. width = (.8 / (len(results[0]) - 1)) plt.bar(X + width, correct, width=width, color=sns.color_palette("Blues", 1), label="Accuracy") plt.bar(X + 2 * width, speedups, width=width, color=sns.color_palette("Greens", 1), label="Speedup") plt.bar(X + 3 * width, perfs, width=width, color=sns.color_palette("Oranges", 1), label="Performance") # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = plt.errorbar(X + 2.5 * width, speedups, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) _,caps,_ = plt.errorbar(X + 3.5 * width, perfs, fmt="none", yerr=perf_yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) plt.xlim(xmin=-.2) plt.xticks(X + .4, labels) plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%' )) title = kwargs.pop("title", "Classification results for " + job + " using speedup regression") plt.title(title) # Add legend *beneath* plot. To do this, we need to pass some # extra arguments to plt.savefig(). See: # # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)] viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
def classification(db, output=None, job="xval", **kwargs): err_fns = db.err_fns base_err_fn = err_fns[0] # Get a list of classifiers and result counts. query = db.execute( "SELECT classifier,Count(*) AS count\n" "FROM classification_results\n" "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n" "GROUP BY classifier", (job, base_err_fn)) results = [] # Add baseline results. baseline = ("4x4") correct = db.execute( "SELECT Count(*) * 1.0 / 3 FROM classification_results " "WHERE job=? AND actual=?", (job, baseline)).fetchone()[0] illegal = 0 refused = 0 time = 0 terr = 0 speedup = (1, 0) perfs = [ row[1] for row in db.execute( "SELECT " " DISTINCT runtime_stats.scenario, " " (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 " "FROM classification_results " "LEFT JOIN runtime_stats " " ON classification_results.scenario=runtime_stats.scenario " "LEFT JOIN scenario_stats " " ON classification_results.scenario=scenario_stats.scenario " "WHERE job=? and runtime_stats.params=?", (job, baseline)) ] perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True)) results.append([ "ZeroR", correct, illegal, refused, time, terr, speedup, speedup, speedup, perf, perf, perf ]) # Get results for classifier, count in query: basename = ml.classifier_basename(classifier) correct, illegal, refused, time, terr = db.execute( "SELECT\n" " (SUM(correct) / CAST(? AS FLOAT)) * 100,\n" " (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n" " (SUM(refused) / CAST(? AS FLOAT)) * 100,\n" " AVG(time) + 2.5,\n" " CONFERROR(time, .95) * 1.5\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (count, count, count, job, classifier, base_err_fn)).fetchone() # Get a list of mean speedups for each err_fn. speedups = [ db.execute( "SELECT\n" " AVG(speedup),\n" " CONFERROR(speedup, .95)\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn)).fetchone() for err_fn in err_fns ] # Get a list of mean perfs for each err_fn. perfs = [ db.execute( "SELECT\n" " AVG(performance) * 100.0,\n" " CONFERROR(performance, .95) * 100.0\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn)).fetchone() for err_fn in err_fns ] results.append([basename, correct, illegal, refused, time, terr] + speedups + perfs) # Zip into lists. labels, correct, illegal, refused, time, terr = zip( *[(text.truncate(result[0], 40), result[1], result[2], result[3], result[4], result[5]) for result in results]) X = np.arange(len(labels)) # PLOT TIMES width = .8 ax = plt.subplot(4, 1, 1) ax.bar(X + .1, time, width=width) ax.set_xticks(X + .4) ax.set_xticklabels(labels) ax.set_ylim(0, 10) ax.set_ylabel("Classification time (ms)") # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = ax.errorbar(X + .5, time, fmt="none", yerr=terr, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) # RATIOS width = (.8 / 3) ax = plt.subplot(4, 1, 2) ax.bar(X + .1, illegal, width=width, color=sns.color_palette("Reds", 1), label="Illegal") ax.bar(X + .1 + width, refused, width=width, color=sns.color_palette("Oranges", 1), label="Refused") ax.bar(X + .1 + 2 * width, correct, width=width, color=sns.color_palette("Blues", 1), label="Accurate") ax.set_xticks(X + .4) ax.set_ylabel("Ratio") ax.set_ylim(0, 35) ax.set_xticklabels(labels) ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # Plot speedups. ax = plt.subplot(4, 1, 3) width = (.8 / 3) colors = sns.color_palette("Greens", len(err_fns)) for i, err_fn in enumerate(db.err_fns): pairs = [result[6 + i] for result in results] speedups, yerrs = zip(*pairs) ax.bar(X + .1 + (i * width), speedups, width=width, label=errfn2label(err_fn), color=colors[i]) # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width, speedups, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) ax.set_xticks(X + .4) ax.set_xticklabels(labels) ax.set_ylim(0, 7) ax.set_xticks(X + .4, labels) ax.set_ylabel("Speedup") art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # PERFORMANCE colors = sns.color_palette("Blues", len(err_fns)) width = (.8 / 3) ax = plt.subplot(4, 1, 4) for i, err_fn in enumerate(db.err_fns): pairs = [result[9 + i] for result in results] perfs, yerrs = zip(*pairs) ax.bar(X + .1 + (i * width), perfs, width=width, label=errfn2label(err_fn), color=colors[i]) # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width, perfs, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) ax.set_xticks(X + .4) ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) ax.set_xticklabels(labels) ax.set_ylim(0, 100) ax.set_ylabel("Performance") ax.set_xticks(X + .4, labels) title = kwargs.pop("title", "Classification results for " + job) plt.title(title) # Add legend *beneath* plot. To do this, we need to pass some # extra arguments to plt.savefig(). See: # # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)] viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
def speedup_classification(db, output=None, job="xval", **kwargs): """ Plot performance of classification using speedup regression. """ # Get a list of classifiers and result counts. query = db.execute( "SELECT classifier,Count(*) AS count\n" "FROM speedup_classification_results\n" "WHERE job=? GROUP BY classifier", (job, )) results = [] for classifier, count in query: basename = ml.classifier_basename(classifier) correct = db.execute( "SELECT\n" " (SUM(correct) / CAST(? AS FLOAT)) * 100\n" "FROM speedup_classification_results\n" "WHERE job=? AND classifier=?", (count, job, classifier)).fetchone()[0] # Get a list of mean speedups for each err_fn. speedups = [ row for row in db.execute( "SELECT\n" " AVG(speedup) * 100,\n" " CONFERROR(speedup, .95) * 100,\n" " AVG(performance) * 100,\n" " CONFERROR(performance, .95) * 100\n" "FROM speedup_classification_results\n" "WHERE job=? AND classifier=?", (job, classifier)).fetchone() ] results.append([basename, correct] + speedups) # Zip into lists. labels, correct, speedups, yerrs, perfs, perf_yerrs = zip(*results) X = np.arange(len(labels)) # Bar width. width = (.8 / (len(results[0]) - 1)) plt.bar(X + width, correct, width=width, color=sns.color_palette("Blues", 1), label="Accuracy") plt.bar(X + 2 * width, speedups, width=width, color=sns.color_palette("Greens", 1), label="Speedup") plt.bar(X + 3 * width, perfs, width=width, color=sns.color_palette("Oranges", 1), label="Performance") # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = plt.errorbar(X + 2.5 * width, speedups, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) _, caps, _ = plt.errorbar(X + 3.5 * width, perfs, fmt="none", yerr=perf_yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) plt.xlim(xmin=-.2) plt.xticks(X + .4, labels) plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) title = kwargs.pop( "title", "Classification results for " + job + " using speedup regression") plt.title(title) # Add legend *beneath* plot. To do this, we need to pass some # extra arguments to plt.savefig(). See: # # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)] viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
def visualise_classification_job(db, job): basedir = "img/classification/{}/".format(job) fs.mkdir(basedir) fs.mkdir(basedir + "classifiers") fs.mkdir(basedir + "err_fns") visualise.err_fn_performance(db, basedir + "err_fns.png", job=job) # Bar plot of all results. visualise.classification(db, "img/classification/{}.png".format(job), job=job) # Per-classifier plots. for i, classifier in enumerate(db.classification_classifiers): visualise.classifier_speedups(db, classifier, basedir + "classifiers/{}.png".format(i), job=job) # Per-err_fn plots. for err_fn in db.err_fns: visualise.err_fn_speedups(db, err_fn, basedir + "err_fns/{}.png".format(err_fn), job=job, sort=True) # Results table. query = db.execute( "SELECT classifier,err_fn,Count(*) AS count\n" "FROM classification_results\n" "WHERE job=? GROUP BY classifier,err_fn", (job, )) results = [] for classifier, err_fn, count in query: correct, illegal, refused, performance, speedup = zip(*[ row for row in db.execute( "SELECT correct,illegal,refused,performance,speedup\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn)) ]) results.append([ classifier, err_fn, (sum(correct) / count) * 100, (sum(illegal) / count) * 100, (sum(refused) / count) * 100, min(performance) * 100, labmath.geomean(performance) * 100, max(performance) * 100, min(speedup), labmath.geomean(speedup), max(speedup) ]) str_args = {"float_format": lambda f: "{:.2f}".format(f)} for i in range(len(results)): results[i][0] = ml.classifier_basename(results[i][0]) columns = ( "CLASSIFIER", "ERR_FN", "ACC %", "INV %", "REF %", "Omin %", "Oavg %", "Omax %", "Smin", "Savg", "Smax", ) latex.table(results, output=fs.path(experiment.TAB_ROOT, job + ".tex"), columns=columns, **str_args)