def callback(br, m): _ = users, sct # FIXME: ip only seems to take local and global; sct is neither.. newsizes = np.array(map(m.ix[0].similarity, users)) * 30 #s = sct.get_sizes() #s[:] = newsizes sct._sizes = newsizes ip()
def callback(event): print 'callback:', event ax = event.inaxes pl.ion() newfig = pl.figure() ax.set_figure(newfig) newfig.set_axes([ax]) newfig.canvas.show() ip()
def play(self): df = self.data_frame() df1 = df.set_index(['fontsize', 'fontname']).sort(ascending=False) for k,v in df.groupby(['fontsize', 'fontname'], sort=True): print '-----' print unicode(k).encode('utf8'), unicode(v).encode('utf8') print df1.to_string() from arsenal.debug import ip; ip()
def play(self): df = self.data_frame() df1 = df.set_index(['fontsize', 'fontname']).sort(ascending=False) for k, v in df.groupby(['fontsize', 'fontname'], sort=True): print '-----' print unicode(k).encode('utf8'), unicode(v).encode('utf8') print df1.to_string() from arsenal.debug import ip ip()
def main(): "Command-line interface for running test cases." from argparse import ArgumentParser p = ArgumentParser() p.add_argument('--boolean', action='store_true') p.add_argument('--minlength', type=int, default=5) p.add_argument('--maxlength', type=int, default=30) p.add_argument('--examples', type=int, required=True) p.add_argument('--seed', type=int, default=None) p.add_argument('--grammar', choices=('medium', 'big'), default='medium') p.add_argument('--aggressive', type=float, default=0.5, help='Pruning rate (zero=no pruning, one=lots of pruning).') args = p.parse_args() np.random.seed(args.seed) s = Setup(train=args.examples, grammar=args.grammar, maxlength=args.maxlength, minlength=args.minlength, features=False) test = _test_correctness_boolean if args.boolean else _test_correctness for i, example in enumerate(s.train): print colors.yellow % '==============================================================' print 'example: %s length: %s' % (i, example.N) test(example, s.grammar, args.aggressive) print colors.green % '==============================================================' print colors.green % 'DONE' print if 0: from arsenal.debug import ip ip() else: pl.ioff() pl.show()
def icons(users, distance): """Visualization using user profile images as the points.""" # It would be pretty cool to put user thumbails where points are. # but i'm still not sure how to do this yet. images = [] try: print 'getting images..' for p in users: print p f = p.image img = imread('image.tmp') images.append(img) except Exception as e: print 'got an error...' import traceback etype, evalue, tb = sys.exc_info() print yellow % '\n'.join(traceback.format_exception(etype, evalue, tb)) ip() (W, H, _) = shape(img) # thumbnails should all be the same size count = len(images) pl.figure() P2, _ = mds(distance, 2) X, Y = P2[:, 0], P2[:, 1] ## XXX: not a great transformation b/c we might stretch more in one dimension def N(x): "force x to fit in interval [0,1]" x = (x - x.min()) x = x / x.max() assert all(x >= 0) and all(x <= 1) return x X = N(X) * 475 Y = N(Y) * 425 figimages = [ pl.figimage(img, xo=x, yo=y) for img, x, y in zip(images, X, Y) ]
def icons(users, distance): """Visualization using user profile images as the points.""" # It would be pretty cool to put user thumbails where points are. # but i'm still not sure how to do this yet. images = [] try: print 'getting images..' for p in users: print p f = p.image img = imread('image.tmp') images.append(img) except Exception as e: print 'got an error...' import traceback etype, evalue, tb = sys.exc_info() print yellow % '\n'.join(traceback.format_exception(etype, evalue, tb)) ip() (W, H, _) = shape(img) # thumbnails should all be the same size count = len(images) pl.figure() P2, _ = mds(distance, 2) X,Y = P2[:,0], P2[:,1] ## XXX: not a great transformation b/c we might stretch more in one dimension def N(x): "force x to fit in interval [0,1]" x = (x - x.min()) x = x / x.max() assert all(x >= 0) and all(x <= 1) return x X = N(X)*475 Y = N(Y)*425 figimages = [pl.figimage(img, xo=x, yo=y) for img, x, y in zip(images, X, Y)]
def main(): from argparse import ArgumentParser p = ArgumentParser() p.add_argument('--save', default='tmp/results.csv') p.add_argument( '--interpolation', choices=['linear', 'pessimistic', 'parametric', 'linear-convex'], default='pessimistic') # reward definition p.add_argument( '--accuracy', #choices=ACC.OPTS, #default='evalb', required=True, help='Measurement used for plotting.') p.add_argument( '--runtime', #choices=RUN.OPTS, #default='pushes', required=True, help='Measurement used for plotting.') # what jobs to show p.add_argument('--target', required=True) p.add_argument('--baseline', required=False) p.add_argument('--others', nargs='*', default=[]) p.add_argument('--filter', nargs='*', default=[], help="e.g., --filter 'df.args_C==-12'") # TODO: added nicer filters for things I've been doing with --filter. # p.add_argment('--grammar') # p.add_argment('--surrogate-accuracy', help='measure to filter jobs by.') # p.add_argment('--surrogate-runtime', help='measure to filter jobs by.') # finalization p.add_argument('--last', action='store_true') p.add_argument('--early-stop', action='store_true') p.add_argument('--early-stop-dev-cheat', action='store_true') p.add_argument('--baseline-is-init', action='store_true') # extra plots p.add_argument('--tradeoff-plot', action='store_true') p.add_argument('--lc', action='store_true') p.add_argument('--show-train', action='store_true') p.add_argument('--show-each', action='store_true', help='flip thru learning curves') # misc p.add_argument('--kill-mode', action='store_true', help='flip thru learning curves asking "kill? [y/n]"') p.add_argument('--jobids', nargs='*', default=[]) p.add_argument('-i', action='store_true') p.add_argument('--other-files', nargs='*', default=[]) args = p.parse_args() # use linear interpolation for plotting Pareto frontier global show_frontier if args.interpolation in {'linear', 'linear-convex'}: def _show_frontier_linear(*a, **k): k['interpolation'] = args.interpolation #'linear' _show_frontier(*a, **k) show_frontier = _show_frontier_linear if args.kill_mode: args.show_each = True if args.show_each: args.lc = True results = path('results') filters = set(args.others + [args.target, args.baseline]) ACCURACY = args.accuracy RUNTIME = args.runtime # if args.load: # D = read_csv('tmp/results.csv', index_col=0) # jobs = read_csv('tmp/jobs.csv', index_col=0) # else: data, jobs = load_results(results, args, filters) D = DataFrame(data) #D.to_csv('tmp/results.csv') jobs = DataFrame(jobs) #jobs.to_csv('tmp/jobs.csv') target = args.target df = D[D.name == target] # apply CLI filter options for f in args.filter: df = df[eval(f)] # this is pretty ghetto. if args.baseline_is_init: # Note: this will use the same filters as the main experiment (e.g., # regularization parameters). baseline = df[(df.iteration == 1)] else: baseline = D[D.name == args.baseline] # NOTE: do before iteration filters (e.g., early-stop/last) def PPPP(df): "Patience: Find out how long it's been since the last improvement." P = [] for jobid, D in df.groupby('jobid'): #p = running_max(list(D.iteration), list(D.dev_reward)) p = running_max(list(D.iteration), list(D.dev_new_policy_reward)) P.append({ 'jobid': jobid, 'patience': D.iteration.max() - p[-1, 0] }) return DataFrame(P).set_index('jobid') P = PPPP(df) # TODO: In most experiments, train rewards are based on a sample which # varies across iterations -- maybe I should use the same examples # throughout. Thus, we probably don't really want to do early stopping based # on this value without smoothing or something. if args.last or args.early_stop or args.early_stop_dev_cheat: assert not (args.early_stop and args.last), "Can't have both." ddd = [] for _, dd in df.groupby('jobid'): if args.last: best = dd.ix[dd.iteration == dd.iteration.max()] # last iteration elif args.early_stop: best = dd.ix[ dd.train_new_policy_reward == dd.train_new_policy_reward.max()] # best train iteration elif args.early_stop_dev_cheat: best = dd.ix[dd.dev_new_policy_reward == dd. dev_new_policy_reward.max()] # best dev iteration # Break ties in dev reward in favor of the training reward #_best = dd.ix[dd[dd.dev_new_policy_reward == dd.dev_new_policy_reward.max()].train_new_policy_reward.argmax()] #_best = dd.ix[dd.dev_new_policy_reward.argmax()] #print _best.iteration, 'out of', dd.iteration.max(), 'iterations' #print _best.log #print >> get_params, _best.log.dirname() / 'new_policy-%03d.npz' % _best.iteration else: raise ValueError('Unrecognized option.') # We require the following yucky code because `best.to_dict()` # returns a dict with values that are each # <strike>single-entry</strike> dicts. # # ^^^ I think this happens because best might contain more than one # value. so clearly when you convert it to a dict you should a # collection of potential values -- that's why theres a dict. row = {} for k, v in best.to_dict().items(): v = list( v.values())[-1] # take the last one if there are ties. row[k] = v ddd.append(row) df = DataFrame(ddd) assert not df.empty, 'DataFrame is empty.' if baseline is not None and not baseline.empty: args_check(baseline, 'baseline') args_check(df, 'df') if args.tradeoff_plot: # TODO: [2015-02-27 Fri] maybe we should sample tradeoff on a nonlinear # scale (e.g., log-scale). We seem to get a much more linear response # from training. This would help prevent the over-sampling of values # with low-accuracy and low-runtime. pl.figure() pl.scatter(df.tradeoff, df.dev_accuracy, lw=0) pl.title(r'accuracy (%s) by $\lambda$' % ACCURACY) pl.xlabel(r'tradeoff ($\lambda$)') pl.ylabel(r'accuracy (%s)' % ACCURACY) pl.figure() pl.scatter(df.tradeoff, df.dev_runtime, lw=0) pl.title(r'runtime (%s) by $\lambda$' % RUNTIME) pl.xlabel(r'tradeoff ($\lambda$)') pl.ylabel(r'runtime (%s)' % RUNTIME) # TODO: It would be interesting to compare the baseline's tradeoff # parameter to ours, but they are sort of incomparable. # #pl.figure() #pl.scatter(baseline.tradeoff, baseline.dev_accuracy, lw=0) #pl.title(r'BASELINE accuracy (%s) by $\lambda$' % ACCURACY) #pl.xlabel(r'tradeoff ($\lambda$)') #pl.ylabel(r'accuracy (%s)' % ACCURACY) #pl.figure() #pl.scatter(baseline.tradeoff, baseline.dev_runtime, lw=0) #pl.title(r'BASELINE runtime (%s) by $\lambda$' % RUNTIME) #pl.xlabel(r'tradeoff ($\lambda$)') #pl.ylabel(r'runtime (%s)' % RUNTIME) frontier = Frontier(args.target, df, args.accuracy, args.runtime) frontier.plot() # # Plot reference policies (oracle1, unpruned and fast-mle). # #if args.target in {'searn4', 'searn5'}: # if ACCURACY != 'no-fail': # # Note: this is sort of silly. Jobs each have a copy of the # # baseline.csv file. Here we have the first one that comes # # up. (Warning: we might mix baselines, so be careful). This "guess' # # let's us avoid passing the file in at the command-line. It's # # conceivable that we might want to show multiple reference policies # # (e.g., different grammars on the same plot), in which case we # # should probably have a CLI option to specify these files. # # # TODO: report baseline parser's accuracy at most-acc's runtime # # # # - Create a class for representing a Pareto frontier, which supports the # # relevant query types: accuracy @ runtime and runtime @ accuracy. # # show_reference_policies = 0 # if show_reference_policies: # # baseline_csv = path('.').glob('results/*-%s-*/dump/baseline.csv' % args.target)[0] # B = read_csv(baseline_csv) # # # Show reference policies (e.g., unpruned, oracle) # marker = {'oracle1': '*', 'fastmle': '^', 'unpruned': 'x'} # for policy in ['oracle1', 'unpruned']: # for name in ['train', 'dev']: # if policy == 'unpruned': # XXX: skip unpruned because it makes the plot ugly # continue # getattr(frontier, '%s_ax' % name) \ # .scatter([B['%s_%s_%s' % (name, policy, RUNTIME)]], # [B['%s_%s_%s' % (name, policy, ACCURACY)]], # c='r', s=40, marker=marker[policy]) # # if args.show_init: # if len(init_run) == 0: # print '[%s]' % red % 'error', 'Failed to find initializer.' # else: # frontier.dev_ax.scatter(init_run, init_acc, s=75, c='k', marker='^') # # if show_reference_policies: # [[unpruned_acc, unpruned_run]] = B[['dev_unpruned_%s' % ACCURACY, 'dev_unpruned_%s' % RUNTIME]].get_values() # most_acc_acc, most_acc_run = df.ix[df['dev_accuracy'].argmax()][['dev_accuracy', 'dev_runtime']] # print 'unpruned: %.4f %g' % (unpruned_acc*100, unpruned_run) # print 'most_acc: %.4f %g' % (most_acc_acc*100, most_acc_run) # print 'MOSTACC: %.2f points more accurate and %.2fx faster than unpruned.' % (100*(most_acc_acc - unpruned_acc), unpruned_run / most_acc_run) # # if show_reference_policies: # # [2015-06-08 Mon] hack together fast-mle by piecing together # # unpruned with oracle runtime (which isn't really exact if more # # grammar rules fire on the unpruned mask... it's not unreasonble # # more rules fire in on unpruned since the gold mask might be # # unsupported by the parser). # [acc] = B['dev_unpruned_%s' % ACCURACY].get_values() # [run] = B['dev_oracle1_%s' % RUNTIME].get_values() # frontier.dev_ax.scatter([run], [acc], c='r', s=40, marker=marker['fastmle']) # [acc] = B['train_unpruned_%s' % ACCURACY].get_values() # [run] = B['train_oracle1_%s' % RUNTIME].get_values() # frontier.train_ax.scatter([run], [acc], c='r', s=40, marker=marker['fastmle']) # # else: # print '[%s] %s' % (red % 'ERROR', 'no baseline.csv file found') if 1: frontier.show_baseline(baseline) # Show frontiers for 'other' things. Not the baseline (because the baseline # gets special handling), but things like older experiments. others = set(D.name.unique()) - {args.target, args.baseline} if others: print print yellow % 'Other curves' print yellow % '============' for other, color in zip(sorted(others), cycle(['m', 'g', 'c', 'k', 'b'])): print '%-9s' % other, color alpha = 1.0 d = D[D.name == other] args_check(d, other) if not d.train_runtime.isnull().all(): show_frontier(d.train_runtime, d.train_accuracy, ax=frontier.train_ax, c=color, alpha=alpha, XMAX=frontier.XMAX, YMIN=frontier.YMIN, lw=LW) if not d.dev_runtime.isnull().all(): # XXX: this is just some cruft from debugging set of jobs. Can probably delete. #print d[['dev_runtime','dev_accuracy']].sort('dev_accuracy') #print df[['dev_runtime','dev_accuracy']].sort('dev_accuracy') #assert (np.abs(np.array(df.dev_accuracy.sort_values()) - np.array(d.dev_accuracy.sort_values())) < 1e-5).all() #assert (np.abs(np.array(df.dev_runtime.sort_values()) - np.array(d.dev_runtime.sort_values())) < 1e-5).all() show_frontier(d.dev_runtime, d.dev_accuracy, ax=frontier.dev_ax, c=color, alpha=alpha, lw=LW, XMAX=frontier.XMAX, YMIN=frontier.YMIN, label=other) for other in args.other_files: dd = read_csv(other, index_col=0) dd['dev_accuracy'] = dd['dev_new_policy_%s' % ACCURACY] dd['dev_runtime'] = dd['dev_new_policy_%s' % RUNTIME] dd['dev_reward'] = dd.dev_accuracy - dd.tradeoff * dd.dev_runtime show_frontier(dd.dev_runtime, dd.dev_accuracy, ax=frontier.dev_ax, lw=LW, label=other) frontier.dev_ax.legend(loc=4) print if len(df.args_C.unique()) > 1: show_groupby_frontiers(df, 'args_C', frontier.XMAX, frontier.YMIN, baseline=baseline) # if len(df.args_accuracy.unique()) > 1: # show_groupby_frontiers(df, 'args_accuracy', frontier.XMAX, frontier.YMIN) if len(df.args_accuracy.unique()) > 1: show_groupby_frontiers(df, 'args_roll_out', frontier.XMAX, frontier.YMIN) # if len(df.args_classifier.unique()) > 1: # show_groupby_frontiers(df, 'args_classifier', frontier.XMAX, frontier.YMIN) #show_groupby_frontiers(df, 'iteration', baseline=baseline) #asymmetry_plots(baseline) job_summary(jobs) #pl.ion() #pl.show() # Summary of jobs that are currently running, e.g., How many iterations have # they run for? How long has it been since they improved (patience)? J = df.join(jobs, 'jobid', rsuffix='_xxx') # needs a suffix because columns overlap. J = J.groupby('jobid').max() J = J.join(P) J['elapsed'] = map(htime, J.elapsed) J['startdate'] = J.start.map(lambda x: x.date()) J = J.sort_values('start') show_cols = [ 'iteration', 'running', 'patience', 'tradeoff', 'elapsed', 'startdate', 'dev_accuracy', 'dev_runtime', 'log', ] running = J[J['running']][show_cols] if running.empty: print red % 'No jobs running.' else: print running #highlight_region(df, baseline, B, frontier.dev_ax, ACCURACY, RUNTIME) frontier.dev_ax.set_title('Pareto frontier *DEV*') frontier.dev_ax.set_xlabel('runtime (%s)' % RUNTIME) frontier.dev_ax.set_ylabel('accuracy (%s)' % ACCURACY) frontier.dev_ax.set_xlim(0, None) frontier.dev_ax.set_ylim(0, 1) frontier.dev_ax.figure.canvas.draw() frontier.dev_ax.figure.savefig('tmp/pareto.png') if args.save: df.to_csv(args.save) #baseline.to_csv('tmp/baseline.csv') # hide the train plot. if not args.show_train: pl.close(frontier.train_ax.figure) if args.i: from arsenal.debug import ip ip() else: pl.ioff() pl.show()
pl.tight_layout() pl.legend(loc=4) pl.show() if 0: # Show separate plots for gold/nongold to highlight "label" errors pl.figure() pl.scatter(ok_gold.delta_run, ok_gold.delta_acc, c=c_g, lw=0, alpha=0.25) pl.scatter(bad_gold.delta_run, bad_gold.delta_acc, c=c_g, lw=0, alpha=0.25) pl.plot(xs, xs, c='k', lw=3) pl.title('gold') pl.figure() pl.scatter(bad_nongold.delta_run, bad_nongold.delta_acc, c=c_n, lw=0, alpha=0.25, label='bad nongold') pl.scatter(ok_nongold.delta_run, ok_nongold.delta_acc, c=c_n, lw=0, alpha=0.25, label='ok nongold') pl.plot(xs, xs, c='k', lw=3) pl.title('non-gold') #sns.jointplot('delta_run', 'delta_acc', data=gold) #pl.title('gold') #sns.jointplot('delta_run', 'delta_acc', data=nongold) #pl.title('nongold') if 0: from arsenal.debug import ip; ip() else: pl.ioff() pl.show()
'args_roll_out', 'args_tradeoff', 'dev_evalb', 'earlystop_elapsed', 'earlystop_passes', 'hours_iterations', 'hours_pass', 'total_time', 'total_passes', 'args_results' ] print cp[show_cols] print print dp[show_cols] with file('tmp/convergence-and-iterations-%s.html' % _args.grammar, 'wb') as html: html.write('<h1>%s</h1>' % _args.grammar) html.write('<h2>CP</h2>') html.write(cp[show_cols].to_html()) html.write('<h2>DP</h2>') html.write(dp[show_cols].to_html()) print >> html, '<center><table><tr style="text-align:center; font-size: 30pt;"><th>CP</th><th>DP</th></tr>' for c, d in zip(cp.args_results, dp.args_results): C = file(c / 'learning-curve.svg').read() D = file(d / 'learning-curve.svg').read() print >> html, '<tr><td>%s<td><td>%s</td></tr>' % (C, D) print >> html, '</table></center>' print print colors.green % 'wrote %s' % html.name print if _args.i: from arsenal.debug import ip ip()
def main(): pl.ion() p = ArgumentParser() p.add_argument('root', type=path) p.add_argument('--quick', action='store_true', help="Load a single evaluation log (for quick tests). Won't run bestof-k runtime.") p.add_argument('-i', action='store_true', help='Interactive mode => open an IPython shell after execution.') args = p.parse_args() runs = [r for r in sorted(args.root.glob('*')) if r.isdir()] if args.quick: print colors.bold % colors.red % 'Warning! only using some of the runs for timing information.' runs = runs[:1] Ds = [(r, load(r)) for r in iterview(runs)] D0, Ds, bestof = sanity_check(Ds) # if 0: # pl.figure() # for name, df in D0.groupby('type'): # pl.scatter(df.avg_bestof_time, df.evalb, c=C[name], lw=0) # show_frontier(df.avg_bestof_time, df.evalb, c=C[name], interpolation='linear-convex', label=name) # #[w,b] = np.polyfit(df.pushes, df.avg_bestof_time, deg=1) # #show_frontier(df.pushes*w + b, df.evalb, interpolation='linear', c=C[name]) # pl.xlabel('sec/sentence (best of %s)' % len(Ds)) # pl.ylabel('Corpus EVALB-F1') # pl.legend(loc=4) # pl.show() rescale = 1/bestof.pushes.max() bestof['pushes_r'] = bestof.pushes*rescale B = bestof[bestof.type=='baseline'].copy() lols = bestof[bestof.type!='baseline'] RO_types = lols.args_roll_out.unique() ax = pl.figure().add_subplot(111) for name, df in reversed(sorted(bestof.groupby('type'))): pl.scatter(df.pushes_r, df.evalb, c=C[name], lw=0, zorder=10, label='', s=50) pts = show_frontier(df.pushes_r, df.evalb, interpolation='linear-convex', lw=2, c=C[name], label=name) ax.plot(pts[:,0], pts[:,1], label=name, c=C[name]) pl.ylabel('Corpus $F_1$') pl.legend(loc=4) pl.tight_layout() ax = pl.gca() conesize = .06 lambda_cone(np.array(B.evalb), np.array(B.pushes_r), ax=ax, c=c_baseline, conesize=conesize, lines=0) # -------------------------------------------------------------------------- # Fit parametric curve to dev points show arrows on test points. from ldp.viz.parametric_fit import fit df = join_with_dev(B) ff, gg = fit(df.dev_pushes, df.dev_evalb) if 0: # enable to show the parametric curve. xs = pl.linspace(0, df.dev_pushes.max()+.1*df.dev_pushes.ptp(), 100) ax.plot(xs*rescale, ff(xs), c='k') ax = pl.gca() for _, z in df.iterrows(): x, y = z.test_pushes*rescale, z.test_evalb arrow(x, y, gg(z.dev_pushes)/rescale, offset=-conesize, c=c_vec_baseline, ax=ax) # -------------------------------------------------------------------------- B.loc[:,'tradeoff'] = np.nan data = [] # Loop over all rollout types joined on initial policy (i.e., the baseline). for i, bl in B.iterrows(): spawn = lols[lols.args_initializer_penalty == bl.args_tradeoff] assert len(spawn) == len(RO_types) models = {} for ro in RO_types: [ix] = spawn[spawn.args_roll_out == ro].index models[ro] = lols.ix[ix] [dev_pushes] = df[df.policy == bl.policy].dev_pushes tradeoff = gg(dev_pushes) B.loc[i,'tradeoff'] = tradeoff if 1: # uncomment to run hypothesis tests. print colors.bold % colors.green % '=============================================================' print 'tradeoff: %g' % tradeoff print baseline_acc, baseline_run = get_acc_run(D0[D0.policy == bl.policy]) row = { 'baseline': bl.policy, 'baseline_accuracy': baseline_acc, 'baseline_runtime': baseline_run, 'baseline_reward': baseline_acc - tradeoff*baseline_run, 'wps_baseline': bl.wps, 'wallclock_baseline': bl.avg_bestof_time, 'tradeoff': tradeoff, } star_sty = dict(alpha=1, lw=0, marker='*', s=700, zorder=100) for ro, model in sorted(models.items()): print colors.bold % '# %s' % ro sig, win = paired_permutation_test(D0, a=bl.policy, b=model.policy, tradeoff=tradeoff, R=5000) acc, run = get_acc_run(D0[D0.policy == model.policy]) row[ro] = model.policy row['%s_accuracy' % ro] = acc row['%s_runtime' % ro] = run row['%s_reward' % ro] = acc - tradeoff*run row['wps_%s' % ro] = model.wps row['wallclock_%s' % ro] = model.avg_bestof_time row['winner_%s' % ro] = win row['sig_%s' % ro] = sig if win == +1: pl.scatter([model.pushes_r], [model.evalb], c=C[ro], **star_sty) elif win == -1: pl.scatter([bl.pushes_r], [bl.evalb], c=C['baseline'], **star_sty) # draw a dotten line to the baseline point. pl.plot([bl.pushes_r, model.pushes_r], [bl.evalb, model.evalb], c=C[ro], lw=1, alpha=0.75, label=None, linestyle='--') data.append(row) #[w,b] = np.polyfit(B.pushes, B.avg_bestof_time, deg=1) xx = lols.pushes_r xx = np.linspace(xx.min(), xx.max(), 12) # put ticks on the top of the plot. #ax.xaxis.tick_top() #pl.xticks(xx, ['%.2g\m(%.2g)' % (x/rescale / 1e6, (x/rescale*w+b)*100) for x in xx], rotation=0) #pl.text(0.4, 0.401, re.sub('(\d)e([\-+]\d+)', r'\1e^{\2}', r'$\textit{seconds} \approx %.2g \cdot \textit{pushes} + %.2g$' % (w,b))) #pl.xlabel('average megapushes ($\\approx$ milliseconds)') pl.xticks(xx, [r'$%.2g$' % (x/rescale / 1e6) for x in xx]) # pl.xticks(xx, [r'%.2g' % (x/rescale / 1e6) for x in xx], rotation=45) if 'medium' not in args.root: pl.xlabel('millions of hyperedges built per sentence') pl.ylim(bestof.evalb.min()-0.02, bestof.evalb.max()+0.015) pl.xlim(bestof.pushes_r.min()-.01, bestof.pushes_r.max()+0.01) zf = pd.DataFrame(data).sort_values('tradeoff') # print zf[['tradeoff', 'baseline_reward', 'cp_reward', 'dp_reward', 'winner_cp', 'winner_dp']].sort_values('tradeoff').to_string(float_format='%.4g'.__mod__, index=0) # print zf[['tradeoff', # 'baseline_accuracy', 'baseline_runtime', # 'cp_accuracy', 'cp_runtime', # 'dp_accuracy', 'dp_runtime', # 'winner_cp', 'winner_dp']].sort_values('tradeoff').to_string(float_format='%.4g'.__mod__, index=0) if not args.quick: sig_file = args.root / 'significance.csv' print print colors.green % 'wrote %s' % sig_file print zf.to_csv(sig_file) if args.i: pl.ion(); pl.show() from arsenal.debug import ip; ip() else: pl.ioff(); pl.show()