def plotDocUsageForProposal(docUsageByUID, savefilename=None, **kwargs): ''' Make trace plot of doc usage for each component. ''' pylab.figure() L = 0 maxVal = 0 for k, uid in enumerate(docUsageByUID): ys = np.asarray(docUsageByUID[uid]) xs = np.arange(0, ys.size) if k < 6: # only a few labels fit well on a legend pylab.plot(xs, ys, label=uid) else: pylab.plot(xs, ys) L = np.maximum(L, ys.size) maxVal = np.maximum(maxVal, ys.max()) # Use big chunk of left-hand side of plot for legend display xlims = np.asarray([-0.75*L, L-0.5]) pylab.xlim(xlims) pylab.xticks(np.arange(1, L)) pylab.ylim([0, 1.1*maxVal]) pylab.xlabel('num proposal steps') pylab.ylabel('num docs using each comp') pylab.legend(loc='upper left', fontsize=12) pylab.subplots_adjust(left=0.2) if savefilename is not None: pylab.savefig(savefilename, pad_inches=0) pylab.close('all')
def plotGauss1D(mu, sigma2, color='b', ax_handle=None, **kwargs): if ax_handle is not None: pylab.sca(ax_handle) mu = np.squeeze(mu) sigma = np.sqrt(np.squeeze(sigma2)) assert mu.size == 1 and mu.ndim == 0 assert sigma.size == 1 and sigma.ndim == 0 xs = mu + sigma * np.arange(-4, 4, 0.01) ps = 1. / np.sqrt(2 * np.pi) * 1. / sigma * \ np.exp(-0.5 * (xs - mu)**2 / sigma**2) pylab.plot(xs, ps, '.', markerfacecolor=color, markeredgecolor=color)
def plotGauss2DContour( mu, Sigma, color='b', radiusLengths=[1.0, 3.0], markersize=3.0, ax_handle=None, ): ''' Plot elliptical contours for provided mean mu, covariance Sigma. Uses only the first 2 dimensions. Post Condition -------------- Plot created on current axes ''' if ax_handle is not None: pylab.sca(ax_handle) mu = np.asarray(mu) Sigma = np.asarray(Sigma) mu = mu[:2] Sigma = Sigma[:2, :2] D, V = np.linalg.eig(Sigma) sqrtSigma = np.dot(V, np.sqrt(np.diag(D))) # Prep for plotting elliptical contours # by creating grid of (x,y) points along perfect circle ts = np.arange(-np.pi, np.pi, 0.03) x = np.sin(ts) y = np.cos(ts) Zcirc = np.vstack([x, y]) # Warp circle into ellipse defined by Sigma's eigenvectors Zellipse = np.dot(sqrtSigma, Zcirc) # plot contour lines across several radius lengths # TODO: instead, choose radius by percentage of prob mass contained within for r in radiusLengths: Z = r * Zellipse + mu[:, np.newaxis] pylab.plot(Z[0], Z[1], '.', markersize=markersize, markerfacecolor=color, markeredgecolor=color)
def makePlot(muVals=[(0.01,0), (0.1,0), (1,0), (10,0)], doCorrection=1): pylab.figure() xgrid = np.linspace(-8, 8, 2000) pylab.hold('on') pylab.plot(xgrid, np.zeros_like(xgrid), ':', alpha=0.2) for mu1, mu2 in muVals: ygrid = calcBregDiv_Gauss1D(xgrid, mu1, mu2, doCorrection=doCorrection) print ygrid.min() pylab.plot(xgrid, ygrid, label='mu1=% 6.2f mu2=% 6.2f' % (mu1, mu2)) pylab.legend(loc='lower right') pylab.xlim([xgrid.min(), xgrid.max()]) pylab.ylim([xgrid.min(), xgrid.max()]) pylab.xlabel('x') if doCorrection: pylab.ylabel('D(x, \mu) + correction') else: pylab.ylabel('D(x, \mu)')
def makePlot(muVals=[0.01, 0.1, 1, 10], B=1e-10, nu=2, justMahalTerm=0): pylab.figure() xgrid = np.linspace(0, 8, 2000) pylab.hold('on') for mu in muVals: ygrid = calcBregDiv_ZeroMean(xgrid, mu, B=B, nu=nu, justMahalTerm=justMahalTerm) pylab.plot(xgrid, ygrid, linewidth=2, label='\mu=%6.2f' % (mu)) pylab.legend(loc='upper right') pylab.xlim([-0.1, xgrid.max()]) pylab.ylim([-0.1, xgrid.max()]) pylab.xlabel('x') pylab.ylabel('D(x, \mu)') pylab.title('B=%s nu=%s' % (str(B), str(nu)))
def plotELBOtermsForProposal( curLdict, propLdictList, xs=None, ymin=-0.5, ymax=0.5, savefilename=None, **kwargs): ''' Create trace plot of ELBO gain/loss relative to current model. ''' pylab.figure() L = len(propLdictList) if xs is None: xs = np.arange(0, L) legendKeys = [] for key in curLdict: if key.count('_') == 0: legendKeys.append(key) for key in legendKeys: if key.count('total'): linewidth= 4 alpha = 1 style = '-' else: linewidth = 3 alpha = 0.5 style = '--' ys = np.asarray([propLdictList[i][key] for i in range(L)]) ys -= curLdict[key] pylab.plot(xs, ys, style, color=_getLineColorFromELBOKey(key), linewidth=linewidth, alpha=alpha, label=key) L = L + 1 xlims = np.asarray([-0.75*L, L-0.5]) pylab.xlim(xlims) pylab.xticks(xs) pylab.plot(xlims, np.zeros_like(xlims), 'k:') pylab.xlabel('num proposal steps') pylab.ylabel('L gain (prop - current)') pylab.legend(loc='lower left', fontsize=12) pylab.subplots_adjust(left=0.2) if savefilename is not None: pylab.savefig(savefilename, pad_inches=0) pylab.close('all')
def plotSingleLineAcrossJobsByXVar(jpathPattern, label='', xvar=None, xvals=None, xlabel=None, yvar='evidence', lineStyle='.-', taskids='all', lineID=0, lvar='', **kwargs): ''' Create line plot in current figure for job matching the pattern Iterates over each xval in provided list of values. Each one corresponds to a single saved job. Post Condition -------------- Current axes have one line added. ''' prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1]) PPListMap = makePPListMapFromJPattern(jpathPattern) if xvals is None: xvals = PPListMap[xvar] xs = np.zeros(len(xvals)) ys = np.zeros(len(xvals)) jpathList = makeListOfJPatternsWithSpecificVals( PPListMap, prefixfilepath=prefixfilepath, key=xvar, vals=xvals, **kwargs) plotargs = copy.deepcopy(DefaultLinePlotKwArgs) # Plot all tasks as faint points with no connections for i, jobpath in enumerate(jpathList): if not os.path.exists(jobpath): raise ValueError("PATH NOT FOUND: %s" % (jobpath)) x = float(xvals[i]) for key in plotargs: if key in kwargs: plotargs[key] = kwargs[key] plotargs['markeredgecolor'] = plotargs['color'] alltaskids = BNPYArgParser.parse_task_ids(jobpath, taskids) for tid in alltaskids: y = loadYValFromDisk(jobpath, tid, yvar=yvar) pylab.plot(x, y, '.', **plotargs) # Plot top-ranked tasks as solid points connected by line for i, jobpath in enumerate(jpathList): rankTasksForSingleJobOnDisk(os.path.join(jobpath)) x = float(xvals[i]) y = loadYValFromDisk(jobpath, '.best', yvar=yvar) assert isinstance(x, float) assert isinstance(y, float) xs[i] = x ys[i] = y plotargs = copy.deepcopy(DefaultLinePlotKwArgs) for key in plotargs: if key in kwargs: plotargs[key] = kwargs[key] plotargs['markeredgecolor'] = plotargs['color'] plotargs['label'] = label pylab.plot(xs, ys, lineStyle, **plotargs) if lineID == 0: if xlabel is None: xlabel = xvar pylab.xlabel(xlabel) pylab.ylabel(LabelMap[yvar])
def plotGauss2DFromHModel(hmodel, compListToPlot=None, compsToHighlight=None, activeCompIDs=None, MaxKToDisplay=50, proba_thr=0.0001, ax_handle=None, dataset=None, Colors=Colors, **kwargs): ''' Plot 2D contours for components in hmodel in current pylab figure Args ---- hmodel : bnpy HModel object compListToPlot : array-like of integer IDs of components within hmodel compsToHighlight : int or array-like integer IDs to highlight if None, all components get unique colors if not None, only highlighted components get colors. proba_thr : float Minimum weight assigned to component in order to be plotted. All components with weight below proba_thr are ignored. ''' if ax_handle is not None: pylab.sca(ax_handle) if compsToHighlight is not None: compsToHighlight = np.asarray(compsToHighlight) if compsToHighlight.ndim == 0: compsToHighlight = np.asarray([compsToHighlight]) else: compsToHighlight = list() if compListToPlot is None: compListToPlot = np.arange(0, hmodel.obsModel.K) if activeCompIDs is None: activeCompIDs = np.arange(0, hmodel.obsModel.K) # Load appearance probabilities as single vector if hmodel.allocModel.K == hmodel.obsModel.K: w = hmodel.allocModel.get_active_comp_probs() else: w = np.ones(hmodel.obsModel.K) if dataset is not None and hasattr(dataset, 'X'): pylab.plot(dataset.X[:, 0], dataset.X[:, 1], '.', color=(.3, .3, .3), alpha=0.5) nSkip = 0 nGood = 0 for ii, compID in enumerate(compListToPlot): if compID not in activeCompIDs: continue kk = np.flatnonzero(activeCompIDs == compID) assert kk.size == 1 kk = kk[0] if w[kk] < proba_thr and compID not in compsToHighlight: nSkip += 1 continue mu = hmodel.obsModel.get_mean_for_comp(kk) Sigma = hmodel.obsModel.get_covar_mat_for_comp(kk) if len(compsToHighlight) == 0 or compID in compsToHighlight: color = Colors[ii % len(Colors)] plotGauss2DContour(mu, Sigma, color=color) elif kk not in compsToHighlight: plotGauss2DContour(mu, Sigma, color='k') nGood += 1 if nGood >= MaxKToDisplay: print('DISPLAY LIMIT EXCEEDED. Showing %d/%d components' \ % (nGood, len(activeCompIDs))) break if nSkip > 0: print('SKIPPED %d comps with size below %.2f' % (nSkip, proba_thr))
def tryDeleteProposalForSpecificTarget_HDPTopicModel( Data, hmodel, LPkwargs=dict(), ktarget=0, kabsorbList=[1], verbose=True, doPlotComps=True, doPlotELBO=True, doPlotDocTopicCount=False, nELBOSteps=3, nUpdateSteps=5, d_initTargetDocTopicCount='warm_start', d_initWordCounts='none', **kwargs): ''' Execute merge for specific whole dataset Returns ------- propModel : HModel propSS : SuffStatBag propLscore : scalar real ELBO score of proposed model curModel : HModel curSS : SuffStatBag curLscore : scalar real ELBO score of current model ''' kabsorbList = parse_list_of_absorbing_comps( kabsorbList, ktarget, hmodel.obsModel.K) from bnpy.allocmodel.topics.HDPTopicRestrictedLocalStep2 \ import summarizeRestrictedLocalStep_HDPTopicModel curModel = hmodel.copy() propModel = hmodel.copy() # Update current model if verbose: print "" print "Loading model from disk and performing local step..." starttime = time.time() curLP = curModel.calc_local_params(Data, **LPkwargs) curSS = curModel.get_global_suff_stats(Data, curLP, doPrecompEntropy=1) curModel.update_global_params(curSS) curLdict = curModel.calc_evidence(SS=curSS, todict=1) curLscore = curLdict['Ltotal'] if verbose: print "%5.1f sec to obtain current model, LP, and SS" % ( time.time() - starttime) nontrivialdocIDs = np.flatnonzero(curLP['DocTopicCount'][:, ktarget] > .01) sort_mask = np.argsort(-1*curLP['DocTopicCount'][nontrivialdocIDs, ktarget]) nontrivialdocIDs = nontrivialdocIDs[sort_mask] docIDs = nontrivialdocIDs[:5] if verbose: print "" print "Proposing deletion of cluster %d" % (ktarget) print " total mass N_k = %.1f" % (curSS.getCountVec()[ktarget]) print " %d docs with non-trivial mass" % (nontrivialdocIDs.size) print "" print "Absorbing into %d/%d remaining clusters" % ( len(kabsorbList), curSS.K-1) print " ".join(['%3d' % (kk) for kk in kabsorbList]) print "" # Create init observation model for absorbing states xObsModel = propModel.obsModel.copy() xinitSS = curSS.copy(includeELBOTerms=False, includeMergeTerms=False) for k in reversed(np.arange(xObsModel.K)): if k not in kabsorbList: xinitSS.removeComp(k) # Find clusters correlated in appearance with the target if curModel.getObsModelName().count('Mult') and d_initWordCounts.count('bycorr'): corrVec = calcCorrelationFromTargetToAbsorbingSet( curLP['DocTopicCount'], ktarget, kabsorbList) bestAbsorbIDs = np.flatnonzero(corrVec >= .001) print "absorbIDs with best correlation:" print bestAbsorbIDs for k in bestAbsorbIDs: xinitSS.WordCounts[k,:] += curSS.WordCounts[ktarget,:] xObsModel.update_global_params(xinitSS) # Create init pi vector for absorbing states curPiVec = propModel.allocModel.get_active_comp_probs() xPiVec = curPiVec[kabsorbList].copy() xPiVec /= xPiVec.sum() xPiVec *= (curPiVec[kabsorbList].sum() + curPiVec[ktarget]) assert np.allclose(np.sum(xPiVec), curPiVec[ktarget] + np.sum(curPiVec[kabsorbList])) if verbose: print "Reassigning target mass among absorbing set..." starttime = time.time() propLscoreList = list() for ELBOstep in range(nELBOSteps): xSS, Info = summarizeRestrictedLocalStep_HDPTopicModel( Dslice=Data, curModel=curModel, curLPslice=curLP, ktarget=ktarget, kabsorbList=kabsorbList, curPiVec=curPiVec, xPiVec=xPiVec, xObsModel=xObsModel, nUpdateSteps=nUpdateSteps, d_initTargetDocTopicCount=d_initTargetDocTopicCount, LPkwargs=LPkwargs) if ELBOstep < nELBOSteps - 1: # Update the xObsModel xObsModel.update_global_params(xSS) # TODO: update xPiVec??? print " completed step %d/%d after %5.1f sec" % ( ELBOstep+1, nELBOSteps, time.time() - starttime) propSS = curSS.copy() propSS.replaceCompsWithContraction( replaceSS=xSS, replaceUIDs=[curSS.uids[k] for k in kabsorbList], removeUIDs=[curSS.uids[ktarget]], ) assert np.allclose(propSS.getCountVec().sum(), curSS.getCountVec().sum(), atol=0.01, rtol=0) propModel.update_global_params(propSS) propLdict = propModel.calc_evidence(SS=propSS, todict=1) propLscore = propModel.calc_evidence(SS=propSS) propLscoreList.append(propLscore) if verbose: print "" print "Proposal result:" if propLscore - curLscore > 0: print " ACCEPTED" else: print " REJECTED" print "%.4e cur ELBO score" % (curLscore) print "%.4e prop ELBO score" % (propLscore) print "% .4e change in ELBO score" % (propLscore - curLscore) print "" for key in sorted(curLdict.keys()): if key.count('_') or key.count('total'): continue print " gain %8s % .3e" % ( key, propLdict[key] - curLdict[key]) print "" if docIDs.size > 0: np.set_printoptions(suppress=1, precision=2, linewidth=120) xLPslice = Info['xLPslice'] print "BEFORE" print "-----" print np.hstack([ curLP['DocTopicCount'][docIDs,:][:,kabsorbList], curLP['DocTopicCount'][docIDs,:][:,ktarget][:,np.newaxis] ]) print "AFTER" print "-----" print xLPslice['DocTopicCount'][docIDs,:] if doPlotELBO: import bnpy.viz from bnpy.viz.PlotUtil import pylab bnpy.viz.PlotUtil.ConfigPylabDefaults(pylab) iters = np.arange(len(propLscoreList)) pylab.plot(iters, propLscoreList, 'b-') pylab.plot(iters, curLscore*np.ones_like(iters), 'k--') pylab.show() if doPlotDocTopicCount: import bnpy.viz from bnpy.viz.PlotUtil import pylab bnpy.viz.PlotUtil.ConfigPylabDefaults(pylab) kplotList = [x for x in kabsorbList] kplotList.append(ktarget) for d in docIDs: curDTClabels = ['%.1f' % (x) for x in curLP['DocTopicCount'][d, kplotList]] bnpy.viz.PlotComps.plotCompsFromHModel( curModel, compListToPlot=kplotList, compsToHighlight=[ktarget], xlabels=curDTClabels, vmin=0, vmax=.01) fig = pylab.gcf() fig.canvas.set_window_title('doc %d BEFORE' % (d)) propLP = Info['xLPslice'] propDTClabels = ['%.1f' % (x) for x in propLP['DocTopicCount'][d, :]] bnpy.viz.PlotComps.plotCompsFromHModel( propModel, xlabels=propDTClabels, vmin=0, vmax=.01) fig = pylab.gcf() fig.canvas.set_window_title('doc %d AFTER' % (d)) pylab.show(block=False) # Plot docs dIm = np.zeros((docIDs.size*2, 900)) dImLabels = list() tImLabels = list() row = 0 for ii,d in enumerate(docIDs): start = Data.doc_range[d] stop = Data.doc_range[d+1] wid = Data.word_id[start:stop] wct = Data.word_count[start:stop] dIm[row, wid] = wct dImLabels.append('doc %d' % (d)) tmask = np.flatnonzero(curLP['resp'][start:stop, ktarget] > .01) targetDoc = np.zeros(900) dIm[row+docIDs.size, wid[tmask]] = wct[tmask] \ * curLP['resp'][start + 1*tmask, ktarget] tImLabels.append('trgt doc %d' % (d)) row += 1 bnpy.viz.BarsViz.showTopicsAsSquareImages( dIm, ncols=2, vmin=0, vmax=1, xlabels=dImLabels.extend(tImLabels), cmap='jet') pylab.show() if doPlotComps: import bnpy.viz from bnpy.viz.PlotUtil import pylab bnpy.viz.PlotUtil.ConfigPylabDefaults(pylab) bnpy.viz.PlotComps.plotCompsFromSS( curModel, curSS, compsToHighlight=[ktarget], vmin=0, vmax=.01) fig = pylab.gcf() fig.canvas.set_window_title('BEFORE') bnpy.viz.PlotComps.plotCompsFromSS( propModel, propSS, vmin=0, vmax=.01) fig = pylab.gcf() fig.canvas.set_window_title('AFTER') pylab.show() return ( propModel, propSS, propLscoreList, curModel, curSS, curLscore)
def plot_all_tasks_for_job(jobpath, label, taskids=None, lineType='.-', spreadLineType='--', color=None, yvar='avgLikScore', xvar='laps', markersize=10, linewidth=2, minLap=0, showFinalPt=0, fileSuffix='PredLik.mat', xjitter=None, prefix='predlik', colorID=0, **kwargs): ''' Create line plot in current figure for each task/run of jobpath ''' if not os.path.exists(jobpath): print('PATH NOT FOUND', jobpath) return None if not yvar.startswith('avg') and yvar.count('Kactive') == 0: yvar = 'avg' + yvar if not yvar.endswith('Score') and yvar.count('Kactive') == 0: yvar = yvar + 'Score' if color is None: color = Colors[colorID % len(Colors)] taskids = BNPYArgParser.parse_task_ids(jobpath, taskids) for tt, taskid in enumerate(taskids): taskoutpath = os.path.join(jobpath, taskid) hpaths = glob.glob(os.path.join(taskoutpath, '*' + fileSuffix)) txtpaths = glob.glob(os.path.join(taskoutpath, 'predlik-*.txt')) ys_hi = None ys_lo = None if len(txtpaths) > 0: if fileSuffix.endswith('.txt'): suffix = '-' + fileSuffix else: suffix = '.txt' if xvar.count('lap'): xs = np.loadtxt( os.path.join(taskoutpath, prefix + '-lapTrain.txt')) elif xvar.count('K'): xs = np.loadtxt(os.path.join(taskoutpath, prefix + '-K.txt')) elif xvar.count('time'): xs = np.loadtxt(os.path.join( taskoutpath, prefix + '-timeTrain.txt')) else: raise ValueError("Unrecognized xvar: " + xvar) if yvar.count('Kactive') and not yvar.count('Percentile'): ys = np.loadtxt(os.path.join(taskoutpath, prefix + '-' + yvar + 'Percentile50.txt')) ys_lo = np.loadtxt(os.path.join(taskoutpath, prefix + '-' + yvar + 'Percentile10.txt')) ys_hi = np.loadtxt(os.path.join(taskoutpath, prefix + '-' + yvar + 'Percentile90.txt')) else: ys = np.loadtxt( os.path.join(taskoutpath, prefix + '-' + yvar + suffix)) if minLap > 0 and taskoutpath.count('fix'): mask = laps > minLap xs = xs[mask] ys = ys[mask] elif len(hpaths) > 0: hpaths.sort() basenames = [x.split(os.path.sep)[-1] for x in hpaths] xs = np.asarray([float(x[3:11]) for x in basenames]) ys = np.zeros_like(xs) for ii, hpath in enumerate(hpaths): MatVars = scipy.io.loadmat(hpath) ys[ii] = float(MatVars['avgPredLL']) else: raise ValueError( 'Pred Lik data unavailable for job\n' + taskoutpath) plotargs = dict(markersize=markersize, linewidth=linewidth, label=None, color=color, markeredgecolor=color, ) plotargs.update(kwargs) if tt == 0: plotargs['label'] = label if xjitter is not None: xs = xs + xjitter pylab.plot(xs, ys, lineType, **plotargs) if ys_lo is not None: del plotargs['label'] pylab.plot(xs, ys_lo, spreadLineType, **plotargs) pylab.plot(xs, ys_hi, spreadLineType, **plotargs) if showFinalPt: pylab.plot(xs[-1], ys[-1], '.', **plotargs) pylab.xlabel(XLabelMap[xvar]) pylab.ylabel(YLabelMap[yvar])
def plot_all_tasks_for_job(jobpath, label, taskids=None, color=None, colorID=0, density=2, yvar='evidence', markersize=10, linewidth=2, linestyle='-', drawLineToXMax=None, showOnlyAfterLap=0, xvar='laps', **kwargs): ''' Create line plot in current figure for each task/run of jobpath ''' if not os.path.exists(jobpath): if not jobpath.startswith(os.path.sep): jobpath_tmp = os.path.join(os.environ['BNPYOUTDIR'], jobpath) if not os.path.exists(jobpath_tmp): raise ValueError("PATH NOT FOUND: %s" % (jobpath)) jobpath = jobpath_tmp if color is None: color = Colors[colorID % len(Colors)] taskids = BNPYArgParser.parse_task_ids(jobpath, taskids) if yvar == 'hamming-distance': yspfile = os.path.join(jobpath, taskids[0], yvar + '-saved-params.txt') if xvar == 'laps' and os.path.isfile(yspfile): xvar = 'laps-saved-params' for tt, taskid in enumerate(taskids): xs = None ys = None laps = None try: var_ext = '' ytxtfile = os.path.join(jobpath, taskid, yvar + '.txt') if not os.path.isfile(ytxtfile): var_ext = '-saved-params' ytxtfile = os.path.join(jobpath, taskid, yvar + var_ext + '.txt') ys = np.loadtxt(ytxtfile) if ytxtfile.count('saved-params'): laptxtfile = os.path.join(jobpath, taskid, 'laps-saved-params.txt') else: laptxtfile = os.path.join(jobpath, taskid, 'laps.txt') except IOError as e: # TODO: when is this code needed? # xs, ys = loadXYFromTopicModelFiles(jobpath, taskid) try: if isinstance(xs, np.ndarray) and yvar.count('Keff'): ys = loadKeffForTask(os.path.join(jobpath, taskid), **kwargs) assert xs.size == ys.size else: # Heldout metrics xs, ys = loadXYFromTopicModelSummaryFiles(jobpath, taskid, xvar=xvar, yvar=yvar) if showOnlyAfterLap and showOnlyAfterLap > 0: laps, _ = loadXYFromTopicModelSummaryFiles(jobpath, taskid, xvar='laps', yvar=yvar) except ValueError: try: xs, ys = loadXYFromTopicModelSummaryFiles(jobpath, taskid) except ValueError: raise e if yvar == 'hamming-distance' or yvar == 'Keff': if xvar == 'laps-saved-params': # fix off-by-one error, if we save an extra dist on final lap if xs.size == ys.size - 1: ys = ys[:-1] elif ys.size == xs.size - 1: xs = xs[:-1] # fix off-by-one error, if we quit early elif xs.size != ys.size: # Try to subsample both time series at laps where they # intersect laps_x = np.loadtxt(os.path.join(jobpath, taskid, 'laps.txt')) laps_y = np.loadtxt( os.path.join(jobpath, taskid, 'laps-saved-params.txt')) assert xs.size == laps_x.size if ys.size == laps_y.size - 1: laps_y = laps_y[:-1] xs = xs[np.in1d(laps_x, laps_y)] ys = ys[np.in1d(laps_y, laps_x)] if xs.size != ys.size: raise ValueError('Dimension mismatch. len(xs)=%d, len(ys)=%d' % (xs.size, ys.size)) # Cleanup laps data. Verify that it is sorted, with no collisions. if xvar == 'laps': diff = xs[1:] - xs[:-1] goodIDs = np.flatnonzero(diff >= 0) if len(goodIDs) < xs.size - 1: print( 'WARNING: looks like multiple runs writing to this file!') print(jobpath) print('Task: ', taskid) print(len(goodIDs), xs.size - 1) xs = np.hstack([xs[goodIDs], xs[-1]]) ys = np.hstack([ys[goodIDs], ys[-1]]) if xvar == 'laps' and yvar == 'evidence': mask = xs >= 1.0 xs = xs[mask] ys = ys[mask] elif showOnlyAfterLap: # print "Filtering for data recorded at lap >= %s" % ( # showOnlyAfterLap) if laps is None: laps = np.loadtxt(laptxtfile) mask = laps >= showOnlyAfterLap xs = xs[mask] ys = ys[mask] # Force plot density (data points per lap) to desired specification # This avoids making plots that have huge file sizes, # due to too much content in the given display space if xvar == 'laps' and xs.size > 20 and np.sum(xs > 5) > 10: if (xs[-1] - xs[9]) != 0: curDensity = (xs.size - 10) / (xs[-1] - xs[9]) else: curDensity = density while curDensity > density and xs.size > 11: # Thin xs and ys data by a factor of 2 # while preserving the first 10 data points xs = np.hstack([xs[:10], xs[10::2]]) ys = np.hstack([ys[:10], ys[10::2]]) curDensity = (xs.size - 10) / (xs[-1] - xs[9]) plotargs = dict(markersize=markersize, linewidth=linewidth, linestyle=linestyle, label=None, color=color, markeredgecolor=color) for key in kwargs: if key in plotargs: plotargs[key] = kwargs[key] if tt == 0: plotargs['label'] = label pylab.plot(xs, ys, **plotargs) if drawLineToXMax: xs_dashed = np.asarray([xs[-1], drawLineToXMax]) ys_dashed = np.asarray([ys[-1], ys[-1]]) plotargs['label'] = None pylab.plot(xs_dashed, ys_dashed, '--', **plotargs) pylab.xlabel(LabelMap[xvar]) if yvar in LabelMap: yLabelStr = LabelMap[yvar] if yvar == 'Keff' and 'effCountThr' in kwargs: effCountThr = float(kwargs['effCountThr']) yLabelStr = yLabelStr + ' > %s' % (str(effCountThr)) pylab.ylabel(yLabelStr)