def test_confinterval_array_mean(self): self._test((1.528595479208968, 2.4714045207910322), labmath.confinterval([1,2,3], conf=0.5, array_mean=2)) expected_ci = (0.528595479209, 1.47140452079) actual_ci = labmath.confinterval([1,2,3], conf=0.5, array_mean=1) self._test(expected_ci[0], actual_ci[0], approximate=True) self._test(expected_ci[0], actual_ci[0], approximate=True)
def profile(self, queue, size: int = 16, must_validate: bool = False, out=sys.stdout, metaout=sys.stderr, min_num_iterations: int = 10): """ Run kernel and profile runtime. Output format (CSV): out: <kernel> <wgsize> <transfer> <runtime> <ci> metaout: <error> <kernel> """ assert (isinstance(queue, cl.CommandQueue)) if must_validate: try: self.validate(queue, size) except CLDriveException as e: print(type(e).__name__, self.name, sep=',', file=metaout) P = KernelPayload.create_random(self, size) k = partial(self, queue) while len(self.runtimes) < min_num_iterations: k(P) wgsize = int(round(labmath.mean(self.wgsizes))) transfer = int(round(labmath.mean(self.transfers))) mean = labmath.mean(self.runtimes) ci = labmath.confinterval(self.runtimes, array_mean=mean)[1] - mean print(self.name, wgsize, transfer, round(mean, 6), round(ci, 6), sep=',', file=out)
def test_confinterval_error_only(self): self._test(0.4714045207910322, labmath.confinterval([1,2,3], conf=.5, error_only=True), approximate=True)
def test_confinterval_normal_dist(self): self._test((0.86841426592382809, 3.1315857340761717), labmath.confinterval([1,2,3], normal_threshold=1))
def test_confinterval_c50(self): self._test((1.528595479208968, 2.4714045207910322), labmath.confinterval([1,2,3], conf=0.5))
def test_confinterval_all_same(self): self._test((1, 1), labmath.confinterval([1,1,1,1,1]))
def test_confinterval_123_array(self): self._test((-0.48413771184375287, 4.4841377118437524), labmath.confinterval([1,2,3]))
def test_confinterval_single_item_array(self): self._test((1, 1), labmath.confinterval([1]))
def test_confinterval_empty_array(self): self._test((0, 0), labmath.confinterval([]))
def classification(db, output=None, job="xval", **kwargs): err_fns = db.err_fns base_err_fn = err_fns[0] # Get a list of classifiers and result counts. query = db.execute( "SELECT classifier,Count(*) AS count\n" "FROM classification_results\n" "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n" "GROUP BY classifier", (job,base_err_fn) ) results = [] # Add baseline results. baseline = ("4x4") correct = db.execute("SELECT Count(*) * 1.0 / 3 FROM classification_results " "WHERE job=? AND actual=?", (job,baseline)).fetchone()[0] illegal = 0 refused = 0 time = 0 terr = 0 speedup = (1, 0) perfs = [ row[1] for row in db.execute( "SELECT " " DISTINCT runtime_stats.scenario, " " (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 " "FROM classification_results " "LEFT JOIN runtime_stats " " ON classification_results.scenario=runtime_stats.scenario " "LEFT JOIN scenario_stats " " ON classification_results.scenario=scenario_stats.scenario " "WHERE job=? and runtime_stats.params=?", (job, baseline) ) ] perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True)) results.append(["ZeroR", correct, illegal, refused, time, terr, speedup, speedup, speedup, perf, perf, perf]) # Get results for classifier,count in query: basename = ml.classifier_basename(classifier) correct, illegal, refused, time, terr = db.execute( "SELECT\n" " (SUM(correct) / CAST(? AS FLOAT)) * 100,\n" " (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n" " (SUM(refused) / CAST(? AS FLOAT)) * 100,\n" " AVG(time) + 2.5,\n" " CONFERROR(time, .95) * 1.5\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (count, count, count, job, classifier, base_err_fn) ).fetchone() # Get a list of mean speedups for each err_fn. speedups = [ db.execute( "SELECT\n" " AVG(speedup),\n" " CONFERROR(speedup, .95)\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn) ).fetchone() for err_fn in err_fns ] # Get a list of mean perfs for each err_fn. perfs = [ db.execute( "SELECT\n" " AVG(performance) * 100.0,\n" " CONFERROR(performance, .95) * 100.0\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn) ).fetchone() for err_fn in err_fns ] results.append([basename, correct, illegal, refused, time, terr] + speedups + perfs) # Zip into lists. labels, correct, illegal, refused, time, terr = zip(*[ (text.truncate(result[0], 40), result[1], result[2], result[3], result[4], result[5]) for result in results ]) X = np.arange(len(labels)) # PLOT TIMES width = .8 ax = plt.subplot(4, 1, 1) ax.bar(X + .1, time, width=width) ax.set_xticks(X + .4) ax.set_xticklabels(labels) ax.set_ylim(0, 10) ax.set_ylabel("Classification time (ms)") # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = ax.errorbar(X + .5, time, fmt="none", yerr=terr, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) # RATIOS width = (.8 / 3) ax = plt.subplot(4, 1, 2) ax.bar(X + .1, illegal, width=width, color=sns.color_palette("Reds", 1), label="Illegal") ax.bar(X + .1 + width, refused, width=width, color=sns.color_palette("Oranges", 1), label="Refused") ax.bar(X + .1 + 2 * width, correct, width=width, color=sns.color_palette("Blues", 1), label="Accurate") ax.set_xticks(X + .4) ax.set_ylabel("Ratio") ax.set_ylim(0, 35) ax.set_xticklabels(labels) ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # Plot speedups. ax = plt.subplot(4, 1, 3) width = (.8 / 3) colors=sns.color_palette("Greens", len(err_fns)) for i,err_fn in enumerate(db.err_fns): pairs = [result[6 + i] for result in results] speedups, yerrs = zip(*pairs) ax.bar(X + .1 + (i * width), speedups, width=width, label=errfn2label(err_fn), color=colors[i]) # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, speedups, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) ax.set_xticks(X + .4) ax.set_xticklabels(labels) ax.set_ylim(0, 7) ax.set_xticks(X + .4, labels) ax.set_ylabel("Speedup") art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # PERFORMANCE colors=sns.color_palette("Blues", len(err_fns)) width = (.8 / 3) ax = plt.subplot(4, 1, 4) for i,err_fn in enumerate(db.err_fns): pairs = [result[9 + i] for result in results] perfs, yerrs = zip(*pairs) ax.bar(X + .1 + (i * width), perfs, width=width, label=errfn2label(err_fn), color=colors[i]) # Plot confidence intervals separately so that we can have # full control over formatting. _,caps,_ = ax.errorbar(X + .1 + (i + .5) * width, perfs, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) ax.set_xticks(X + .4) ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) ax.set_xticklabels(labels) ax.set_ylim(0, 100) ax.set_ylabel("Performance") ax.set_xticks(X + .4, labels) title = kwargs.pop("title", "Classification results for " + job) plt.title(title) # Add legend *beneath* plot. To do this, we need to pass some # extra arguments to plt.savefig(). See: # # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)] viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)
def classification(db, output=None, job="xval", **kwargs): err_fns = db.err_fns base_err_fn = err_fns[0] # Get a list of classifiers and result counts. query = db.execute( "SELECT classifier,Count(*) AS count\n" "FROM classification_results\n" "WHERE job=? AND err_fn=? AND classifier!='weka.classifiers.rules.ZeroR'\n" "GROUP BY classifier", (job, base_err_fn)) results = [] # Add baseline results. baseline = ("4x4") correct = db.execute( "SELECT Count(*) * 1.0 / 3 FROM classification_results " "WHERE job=? AND actual=?", (job, baseline)).fetchone()[0] illegal = 0 refused = 0 time = 0 terr = 0 speedup = (1, 0) perfs = [ row[1] for row in db.execute( "SELECT " " DISTINCT runtime_stats.scenario, " " (scenario_stats.oracle_runtime / runtime_stats.mean) * 100 " "FROM classification_results " "LEFT JOIN runtime_stats " " ON classification_results.scenario=runtime_stats.scenario " "LEFT JOIN scenario_stats " " ON classification_results.scenario=scenario_stats.scenario " "WHERE job=? and runtime_stats.params=?", (job, baseline)) ] perf = (labmath.mean(perfs), labmath.confinterval(perfs, error_only=True)) results.append([ "ZeroR", correct, illegal, refused, time, terr, speedup, speedup, speedup, perf, perf, perf ]) # Get results for classifier, count in query: basename = ml.classifier_basename(classifier) correct, illegal, refused, time, terr = db.execute( "SELECT\n" " (SUM(correct) / CAST(? AS FLOAT)) * 100,\n" " (SUM(illegal) / CAST(? AS FLOAT)) * 100,\n" " (SUM(refused) / CAST(? AS FLOAT)) * 100,\n" " AVG(time) + 2.5,\n" " CONFERROR(time, .95) * 1.5\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (count, count, count, job, classifier, base_err_fn)).fetchone() # Get a list of mean speedups for each err_fn. speedups = [ db.execute( "SELECT\n" " AVG(speedup),\n" " CONFERROR(speedup, .95)\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn)).fetchone() for err_fn in err_fns ] # Get a list of mean perfs for each err_fn. perfs = [ db.execute( "SELECT\n" " AVG(performance) * 100.0,\n" " CONFERROR(performance, .95) * 100.0\n" "FROM classification_results\n" "WHERE job=? AND classifier=? AND err_fn=?", (job, classifier, err_fn)).fetchone() for err_fn in err_fns ] results.append([basename, correct, illegal, refused, time, terr] + speedups + perfs) # Zip into lists. labels, correct, illegal, refused, time, terr = zip( *[(text.truncate(result[0], 40), result[1], result[2], result[3], result[4], result[5]) for result in results]) X = np.arange(len(labels)) # PLOT TIMES width = .8 ax = plt.subplot(4, 1, 1) ax.bar(X + .1, time, width=width) ax.set_xticks(X + .4) ax.set_xticklabels(labels) ax.set_ylim(0, 10) ax.set_ylabel("Classification time (ms)") # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = ax.errorbar(X + .5, time, fmt="none", yerr=terr, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) # RATIOS width = (.8 / 3) ax = plt.subplot(4, 1, 2) ax.bar(X + .1, illegal, width=width, color=sns.color_palette("Reds", 1), label="Illegal") ax.bar(X + .1 + width, refused, width=width, color=sns.color_palette("Oranges", 1), label="Refused") ax.bar(X + .1 + 2 * width, correct, width=width, color=sns.color_palette("Blues", 1), label="Accurate") ax.set_xticks(X + .4) ax.set_ylabel("Ratio") ax.set_ylim(0, 35) ax.set_xticklabels(labels) ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # Plot speedups. ax = plt.subplot(4, 1, 3) width = (.8 / 3) colors = sns.color_palette("Greens", len(err_fns)) for i, err_fn in enumerate(db.err_fns): pairs = [result[6 + i] for result in results] speedups, yerrs = zip(*pairs) ax.bar(X + .1 + (i * width), speedups, width=width, label=errfn2label(err_fn), color=colors[i]) # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width, speedups, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) ax.set_xticks(X + .4) ax.set_xticklabels(labels) ax.set_ylim(0, 7) ax.set_xticks(X + .4, labels) ax.set_ylabel("Speedup") art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -.1), ncol=3)] # PERFORMANCE colors = sns.color_palette("Blues", len(err_fns)) width = (.8 / 3) ax = plt.subplot(4, 1, 4) for i, err_fn in enumerate(db.err_fns): pairs = [result[9 + i] for result in results] perfs, yerrs = zip(*pairs) ax.bar(X + .1 + (i * width), perfs, width=width, label=errfn2label(err_fn), color=colors[i]) # Plot confidence intervals separately so that we can have # full control over formatting. _, caps, _ = ax.errorbar(X + .1 + (i + .5) * width, perfs, fmt="none", yerr=yerrs, capsize=3, ecolor="k") for cap in caps: cap.set_color('k') cap.set_markeredgewidth(1) ax.set_xticks(X + .4) ax.yaxis.set_major_formatter(FormatStrFormatter('%d\\%%')) ax.set_xticklabels(labels) ax.set_ylim(0, 100) ax.set_ylabel("Performance") ax.set_xticks(X + .4, labels) title = kwargs.pop("title", "Classification results for " + job) plt.title(title) # Add legend *beneath* plot. To do this, we need to pass some # extra arguments to plt.savefig(). See: # # http://jb-blog.readthedocs.org/en/latest/posts/12-matplotlib-legend-outdide-plot.html # art = [plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=3)] viz.finalise(output, additional_artists=art, bbox_inches="tight", **kwargs)