def plotDataAsSquareImages(Data, unitIDsToPlot=None, figID=None, nPlots=16, doShowNow=False, seed=0, randstate=np.random.RandomState(0), **kwargs): if seed is not None: randstate = np.random.RandomState(seed) if figID is None: pylab.figure() V = Data.dim assert isPerfectSquare(V) sqrtV = int(np.sqrt(V)) if unitIDsToPlot is not None: nPlots = len(unitIDsToPlot) else: size = np.minimum(Data.nObs, nPlots) unitIDsToPlot = randstate.choice(Data.nObs, size=size, replace=False) nRows = np.floor(np.sqrt(nPlots)) nCols = np.ceil(nPlots / nRows) for plotPos, unitID in enumerate(unitIDsToPlot): squareIm = np.reshape(Data.X[unitID], (sqrtV, sqrtV)) pylab.subplot(nRows, nCols, plotPos + 1) pylab.imshow(squareIm, **imshowArgs) pylab.axis('image') pylab.xticks([]) pylab.yticks([]) pylab.tight_layout() if doShowNow: pylab.show()
def plotBarsFromHModel( hmodel, Data=None, doShowNow=False, figH=None, doSquare=1, xlabels=[], compsToHighlight=None, compListToPlot=None, activeCompIDs=None, Kmax=50, width=6, height=3, vmax=None, block=0, # unused jobname='', # unused **kwargs): if vmax is not None: kwargs['vmax'] = vmax if hasattr(hmodel.obsModel, 'Post'): lam = hmodel.obsModel.Post.lam topics = lam / lam.sum(axis=1)[:, np.newaxis] else: topics = hmodel.obsModel.EstParams.phi.copy() # Determine intensity scale for topic-word image global imshowArgs if vmax is not None: imshowArgs['vmax'] = vmax else: imshowArgs['vmax'] = 1.5 * np.percentile(topics, 95) if doSquare: figH = showTopicsAsSquareImages(topics, activeCompIDs=activeCompIDs, compsToHighlight=compsToHighlight, compListToPlot=compListToPlot, Kmax=Kmax, figH=figH, xlabels=xlabels, **kwargs) else: if figH is None: figH = pylab.figure(figsize=(width, height)) else: pylab.axes(figH) showAllTopicsInSingleImage(topics, compsToHighlight, **kwargs) if doShowNow: pylab.show() return figH
def plotJobs(jpaths, legNames, styles=None, fileSuffix='PredLik.mat', xvar='laps', yvar='avgLikScore', loc='upper right', minLap=0, showFinalPt=0, prefix='predlik', taskids=None, savefilename=None, tickfontsize=None, xjitter=None, bbox_to_anchor=None, **kwargs): ''' Create line plots for provided jobs ''' nLines = len(jpaths) nLeg = len(legNames) assert nLines <= nLeg jitterByJob = np.linspace(-.5, .5, len(jpaths)) for lineID in range(nLines): if styles is None: curStyle = dict(colorID=lineID) else: curStyle = styles[lineID] if xjitter is not None: xjitter = jitterByJob[lineID] plot_all_tasks_for_job(jpaths[lineID], legNames[lineID], minLap=minLap, xvar=xvar, yvar=yvar, fileSuffix=fileSuffix, showFinalPt=showFinalPt, prefix=prefix, taskids=taskids, xjitter=xjitter, **curStyle) if loc is not None and len(jpaths) > 1: pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor) if tickfontsize is not None: pylab.tick_params(axis='both', which='major', labelsize=tickfontsize) if savefilename is not None: try: pylab.show(block=False) except TypeError: pass # when using IPython notebook pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0) else: try: pylab.show(block=True) except TypeError: pass # when using IPython notebook
def plotCompsFromHModel( hmodel, doShowNow=False, block=0, # unused jobname='', # unused vocabList=None, # catchall **kwargs): if hasattr(hmodel.obsModel, 'Post'): hmodel.obsModel.setEstParamsFromPost() phi = hmodel.obsModel.EstParams.phi.copy() dim = phi.shape[1] if dim > 9 and isPerfectSquare(dim): figH = plotCompsAsSquareImages(phi, **kwargs) else: figH = plotCompsAsRowsInSingleImage(phi, **kwargs) if doShowNow: pylab.show() return figH
def plotCompsForJob(jobpath='', taskids=[1], lap=None, **kwargs): ''' Show plot of learned clusters from run(s) saved results on disk ''' # Verify given absolute path is valid. jobpath_originalarg = jobpath if not os.path.isdir(jobpath): # Fallback: try to prepend BNPYOUTDIR to handle "shortcut" names jobpath = os.path.join(os.environ['BNPYOUTDIR'], jobpath) if not os.path.isdir(jobpath): raise ValueError('Not valid path: ' + jobpath_originalarg) taskids = BNPYArgParser.parse_task_ids(jobpath, taskids) for tt, taskid in enumerate(taskids): if tt == 0 and isinstance(taskid, str): if taskid.startswith('.'): rankTasksForSingleJobOnDisk(jobpath) taskpath = os.path.join(jobpath, str(taskid)) plotCompsForTask(taskpath, lap=lap, **kwargs) if 'block' in kwargs: pylab.show(block=kwargs['block'])
def main(): nDoc = 100 Data = BarsK10V900.get_data(nDocTotal=nDoc, nWordsPerDoc=500, seed=12345) pathA = "$BNPYOUTDIR/BarsD100K10/seattle-alg=bnpyDPbirthmerge-lik=Mult-lam=0.1-K=10-initname=randexamples-nBatch=1/1/" pathB = "$BNPYOUTDIR/BarsD100K10/seattle-alg=bnpyDPbirthmerge-lik=Mult-lam=0.1-K=50-initname=randexamples-nBatch=1/1/" aM, aLP, aSS, aLscore = loadModelWithLPSSandLscore(Data, pathA, 'A') bM, bLP, bSS, bLscore = loadModelWithLPSSandLscore(Data, pathB, 'B') bZ = bLP['resp'].argmax(axis=1) aZ = aLP['resp'].argmax(axis=1) aaZ = alignEstimatedStateSeqToTruth(aZ, bZ) offDocIDs = np.flatnonzero(aaZ != bZ) from IPython import embed embed() return #for doc in offDocIDs: # ktarget = aaZ[doc] # targetUID = bSS.uids[ktarget] # cLP = copy.deepcopy(bLP) # cLP['resp'][doc,:] = 1e-40 # cLP['resp'][doc,ktarget] = 1.0 # calcLscoreFromModel(Data, bM, cLP, 'B swap doc %d' % (doc)) # bnpy.viz.BarsViz.plotExampleBarsDocs(Data, docIDsToPlot=[doc], vmax=10) cLP = copy.deepcopy(bLP) for doc in offDocIDs: ktarget = aaZ[doc] targetUID = bSS.uids[ktarget] cLP['resp'][doc, :] = 1e-40 cLP['resp'][doc, ktarget] = 1.0 calcLscoreFromModel(Data, bM, cLP, 'B swap both') for doc in offDocIDs: cLP = dict(resp=np.hstack([bLP['resp'], 1e-40 * np.ones((nDoc, 1))])) cLP['resp'][doc, :] = 1e-40 cLP['resp'][doc, -1] = 1.0 calcLscoreFromModel(Data, bM, cLP, 'B new doc %d' % (doc)) pylab.show(block=0) pylab.draw() from IPython import embed embed() return #cLP = copy.deepcopy(bLP) #cLP['resp'][32,:] = 1e-40 #cLP['resp'][32,10] = 1.0 #calcLscoreFromModel(Data, bM, cLP, 'B swap 32') return cLP = copy.deepcopy(bLP) cLP['resp'][32, :] = 1e-40 cLP['resp'][32, 10] = 1.0 cLP['resp'][10, :] = 1e-40 cLP['resp'][10, 10] = 1.0 calcLscoreFromModel(Data, bM, cLP, 'B swap 10&32') cLP = dict(resp=np.hstack([bLP['resp'], 1e-40 * np.ones((40, 1))])) cLP['resp'][32, :] = 1e-40 cLP['resp'][32, -1] = 1.0 calcLscoreFromModel(Data, bM, cLP, 'B new 32') cLP = dict(resp=np.hstack([bLP['resp'], 1e-40 * np.ones((40, 1))])) cLP['resp'][10, :] = 1e-40 cLP['resp'][10, -1] = 1.0 calcLscoreFromModel(Data, bM, cLP, 'B new 10') bnpy.viz.BarsViz.plotExampleBarsDocs(Data, docIDsToPlot=[32], vmax=10) pylab.title('Doc 32') bnpy.viz.BarsViz.plotExampleBarsDocs(Data, docIDsToPlot=[10], vmax=10) pylab.title('Doc 10') bnpy.viz.BarsViz.plotExampleBarsDocs(Data, docIDsToPlot=docIDsInClusterWith32, vmax=10) pylab.title('Cluster 0') pylab.show(block=0) raw_input("Press any key to continue >>") for doc in [1, 2, 3, 10, 32]: ktarget = bZ[doc] targetUID = bSS.uids[ktarget] try: propXSS, Info = createSplitStats(Data, bM, bLP, curSSwhole=bSS, targetUID=targetUID, newUIDs=np.arange(100, 110), lapFrac=doc, b_cleanupMaxNumMergeIters=10, b_cleanupMaxNumAcceptPerIter=2, **b_kwargs) except BirthProposalError as e: print e
def plotManyPanelsByPVar(jpathPattern='/tmp/', pvar=None, pvals=None, W=5, H=4, savefilename=None, doShowNow=False, **kwargs): ''' Create line plots for jobs matching pattern and provided kwargs ''' if pvar is None: jpathList = [jpathPattern] pvar = None pvals = [None] else: prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1]) PPListMap = makePPListMapFromJPattern(jpathPattern) if pvals is None: pvals = PPListMap[pvar] else: pvals = [p for p in pvals if p in PPListMap[pvar]] jpathList = makeListOfJPatternsWithSpecificVals( PPListMap, prefixfilepath=prefixfilepath, key=pvar, vals=pvals, **kwargs) nrows = 1 ncols = len(pvals) pylab.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * W, nrows * H)) axH = None for panelID, panel_jobPattern in enumerate(jpathList): axH = pylab.subplot(nrows, ncols, panelID + 1, sharey=axH, sharex=axH) # Only show legend on first plot if panelID > 0 and 'loc' in kwargs: kwargs['loc'] = None kwargs['doShowNow'] = False plotMultipleLinesByLVar(panel_jobPattern, **kwargs) if pvar is not None: pylab.title('%s=%s' % (pvar, pvals[panelID])) pylab.subplots_adjust(bottom=0.15, wspace=0.5) if savefilename is not None: try: pylab.show(block=False) except TypeError: pass # when using IPython notebook pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0) elif doShowNow: try: pylab.show(block=True) except TypeError: pass # when using IPython notebook Info = dict( nrows=nrows, ncols=ncols, ) return Info
def plotMultipleLinesByLVar(jpathPattern, lvar=None, lvals=None, ColorMap=DefaultColorList, loc=None, bbox_to_anchor=None, savefilename=None, tickfontsize=None, doShowNow=False, **kwargs): ''' Create line plots for provided jobs. ''' prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1]) PPListMap = makePPListMapFromJPattern(jpathPattern) if lvals is None: lvals = PPListMap[lvar] elif not isinstance(lvals, list): lvals = [lvals] # Make sure all lval values are street legal (aka exist on disk) lvals = [ll for ll in lvals if ll == '.best' or ll in PPListMap[lvar]] # Do ranking in advance for each relevant job '''if lvals[0] == '.best': xvar = kwargs['xvar'] if 'xvals' in kwargs: xvals = kwargs['xvals'] else: xvals = PPListMap[xvar] for xval in xvals: keyValDict = dict() keyValDict[xvar] = xval jpatternForXVal = makeJPatternWithSpecificVals( PPListMap, prefixfilepath=prefixfilepath, **keyValDict) TaskRanker.markBestAmongJobPatternOnDisk(jpatternForXVal) ''' # Create list of jobs with corresponding pattern jpathList = makeListOfJPatternsWithSpecificVals( PPListMap, prefixfilepath=prefixfilepath, key=lvar, vals=lvals, **kwargs) for lineID, line_jobPattern in enumerate(jpathList): line_label = '%s=%s' % (lvar, lvals[lineID]) if isinstance(ColorMap, dict): for label in [line_label, line_jobPattern]: try: line_color = ColorMap[label] except KeyError: line_color = DefaultColorList[lineID] else: # Access next elt in ColorMap list line_color = ColorMap[lineID] plotSingleLineAcrossJobsByXVar(line_jobPattern, label=line_label, color=line_color, lineID=lineID, lvar=lvar, **kwargs) if loc is not None and len(jpathList) > 1: pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor) if tickfontsize is not None: pylab.tick_params(axis='both', which='major', labelsize=tickfontsize) if savefilename is not None: try: pylab.show(block=False) except TypeError: pass # when using IPython notebook pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0) elif doShowNow: try: pylab.show(block=True) except TypeError: pass # when using IPython notebook
def tryDeleteProposalForSpecificTarget_HDPTopicModel( Data, hmodel, LPkwargs=dict(), ktarget=0, kabsorbList=[1], verbose=True, doPlotComps=True, doPlotELBO=True, doPlotDocTopicCount=False, nELBOSteps=3, nUpdateSteps=5, d_initTargetDocTopicCount='warm_start', d_initWordCounts='none', **kwargs): ''' Execute merge for specific whole dataset Returns ------- propModel : HModel propSS : SuffStatBag propLscore : scalar real ELBO score of proposed model curModel : HModel curSS : SuffStatBag curLscore : scalar real ELBO score of current model ''' kabsorbList = parse_list_of_absorbing_comps( kabsorbList, ktarget, hmodel.obsModel.K) from bnpy.allocmodel.topics.HDPTopicRestrictedLocalStep2 \ import summarizeRestrictedLocalStep_HDPTopicModel curModel = hmodel.copy() propModel = hmodel.copy() # Update current model if verbose: print "" print "Loading model from disk and performing local step..." starttime = time.time() curLP = curModel.calc_local_params(Data, **LPkwargs) curSS = curModel.get_global_suff_stats(Data, curLP, doPrecompEntropy=1) curModel.update_global_params(curSS) curLdict = curModel.calc_evidence(SS=curSS, todict=1) curLscore = curLdict['Ltotal'] if verbose: print "%5.1f sec to obtain current model, LP, and SS" % ( time.time() - starttime) nontrivialdocIDs = np.flatnonzero(curLP['DocTopicCount'][:, ktarget] > .01) sort_mask = np.argsort(-1*curLP['DocTopicCount'][nontrivialdocIDs, ktarget]) nontrivialdocIDs = nontrivialdocIDs[sort_mask] docIDs = nontrivialdocIDs[:5] if verbose: print "" print "Proposing deletion of cluster %d" % (ktarget) print " total mass N_k = %.1f" % (curSS.getCountVec()[ktarget]) print " %d docs with non-trivial mass" % (nontrivialdocIDs.size) print "" print "Absorbing into %d/%d remaining clusters" % ( len(kabsorbList), curSS.K-1) print " ".join(['%3d' % (kk) for kk in kabsorbList]) print "" # Create init observation model for absorbing states xObsModel = propModel.obsModel.copy() xinitSS = curSS.copy(includeELBOTerms=False, includeMergeTerms=False) for k in reversed(np.arange(xObsModel.K)): if k not in kabsorbList: xinitSS.removeComp(k) # Find clusters correlated in appearance with the target if curModel.getObsModelName().count('Mult') and d_initWordCounts.count('bycorr'): corrVec = calcCorrelationFromTargetToAbsorbingSet( curLP['DocTopicCount'], ktarget, kabsorbList) bestAbsorbIDs = np.flatnonzero(corrVec >= .001) print "absorbIDs with best correlation:" print bestAbsorbIDs for k in bestAbsorbIDs: xinitSS.WordCounts[k,:] += curSS.WordCounts[ktarget,:] xObsModel.update_global_params(xinitSS) # Create init pi vector for absorbing states curPiVec = propModel.allocModel.get_active_comp_probs() xPiVec = curPiVec[kabsorbList].copy() xPiVec /= xPiVec.sum() xPiVec *= (curPiVec[kabsorbList].sum() + curPiVec[ktarget]) assert np.allclose(np.sum(xPiVec), curPiVec[ktarget] + np.sum(curPiVec[kabsorbList])) if verbose: print "Reassigning target mass among absorbing set..." starttime = time.time() propLscoreList = list() for ELBOstep in range(nELBOSteps): xSS, Info = summarizeRestrictedLocalStep_HDPTopicModel( Dslice=Data, curModel=curModel, curLPslice=curLP, ktarget=ktarget, kabsorbList=kabsorbList, curPiVec=curPiVec, xPiVec=xPiVec, xObsModel=xObsModel, nUpdateSteps=nUpdateSteps, d_initTargetDocTopicCount=d_initTargetDocTopicCount, LPkwargs=LPkwargs) if ELBOstep < nELBOSteps - 1: # Update the xObsModel xObsModel.update_global_params(xSS) # TODO: update xPiVec??? print " completed step %d/%d after %5.1f sec" % ( ELBOstep+1, nELBOSteps, time.time() - starttime) propSS = curSS.copy() propSS.replaceCompsWithContraction( replaceSS=xSS, replaceUIDs=[curSS.uids[k] for k in kabsorbList], removeUIDs=[curSS.uids[ktarget]], ) assert np.allclose(propSS.getCountVec().sum(), curSS.getCountVec().sum(), atol=0.01, rtol=0) propModel.update_global_params(propSS) propLdict = propModel.calc_evidence(SS=propSS, todict=1) propLscore = propModel.calc_evidence(SS=propSS) propLscoreList.append(propLscore) if verbose: print "" print "Proposal result:" if propLscore - curLscore > 0: print " ACCEPTED" else: print " REJECTED" print "%.4e cur ELBO score" % (curLscore) print "%.4e prop ELBO score" % (propLscore) print "% .4e change in ELBO score" % (propLscore - curLscore) print "" for key in sorted(curLdict.keys()): if key.count('_') or key.count('total'): continue print " gain %8s % .3e" % ( key, propLdict[key] - curLdict[key]) print "" if docIDs.size > 0: np.set_printoptions(suppress=1, precision=2, linewidth=120) xLPslice = Info['xLPslice'] print "BEFORE" print "-----" print np.hstack([ curLP['DocTopicCount'][docIDs,:][:,kabsorbList], curLP['DocTopicCount'][docIDs,:][:,ktarget][:,np.newaxis] ]) print "AFTER" print "-----" print xLPslice['DocTopicCount'][docIDs,:] if doPlotELBO: import bnpy.viz from bnpy.viz.PlotUtil import pylab bnpy.viz.PlotUtil.ConfigPylabDefaults(pylab) iters = np.arange(len(propLscoreList)) pylab.plot(iters, propLscoreList, 'b-') pylab.plot(iters, curLscore*np.ones_like(iters), 'k--') pylab.show() if doPlotDocTopicCount: import bnpy.viz from bnpy.viz.PlotUtil import pylab bnpy.viz.PlotUtil.ConfigPylabDefaults(pylab) kplotList = [x for x in kabsorbList] kplotList.append(ktarget) for d in docIDs: curDTClabels = ['%.1f' % (x) for x in curLP['DocTopicCount'][d, kplotList]] bnpy.viz.PlotComps.plotCompsFromHModel( curModel, compListToPlot=kplotList, compsToHighlight=[ktarget], xlabels=curDTClabels, vmin=0, vmax=.01) fig = pylab.gcf() fig.canvas.set_window_title('doc %d BEFORE' % (d)) propLP = Info['xLPslice'] propDTClabels = ['%.1f' % (x) for x in propLP['DocTopicCount'][d, :]] bnpy.viz.PlotComps.plotCompsFromHModel( propModel, xlabels=propDTClabels, vmin=0, vmax=.01) fig = pylab.gcf() fig.canvas.set_window_title('doc %d AFTER' % (d)) pylab.show(block=False) # Plot docs dIm = np.zeros((docIDs.size*2, 900)) dImLabels = list() tImLabels = list() row = 0 for ii,d in enumerate(docIDs): start = Data.doc_range[d] stop = Data.doc_range[d+1] wid = Data.word_id[start:stop] wct = Data.word_count[start:stop] dIm[row, wid] = wct dImLabels.append('doc %d' % (d)) tmask = np.flatnonzero(curLP['resp'][start:stop, ktarget] > .01) targetDoc = np.zeros(900) dIm[row+docIDs.size, wid[tmask]] = wct[tmask] \ * curLP['resp'][start + 1*tmask, ktarget] tImLabels.append('trgt doc %d' % (d)) row += 1 bnpy.viz.BarsViz.showTopicsAsSquareImages( dIm, ncols=2, vmin=0, vmax=1, xlabels=dImLabels.extend(tImLabels), cmap='jet') pylab.show() if doPlotComps: import bnpy.viz from bnpy.viz.PlotUtil import pylab bnpy.viz.PlotUtil.ConfigPylabDefaults(pylab) bnpy.viz.PlotComps.plotCompsFromSS( curModel, curSS, compsToHighlight=[ktarget], vmin=0, vmax=.01) fig = pylab.gcf() fig.canvas.set_window_title('BEFORE') bnpy.viz.PlotComps.plotCompsFromSS( propModel, propSS, vmin=0, vmax=.01) fig = pylab.gcf() fig.canvas.set_window_title('AFTER') pylab.show() return ( propModel, propSS, propLscoreList, curModel, curSS, curLscore)
def makePlot(muVals=[(0.01,0), (0.1,0), (1,0), (10,0)], doCorrection=1): pylab.figure() xgrid = np.linspace(-8, 8, 2000) pylab.hold('on') pylab.plot(xgrid, np.zeros_like(xgrid), ':', alpha=0.2) for mu1, mu2 in muVals: ygrid = calcBregDiv_Gauss1D(xgrid, mu1, mu2, doCorrection=doCorrection) print ygrid.min() pylab.plot(xgrid, ygrid, label='mu1=% 6.2f mu2=% 6.2f' % (mu1, mu2)) pylab.legend(loc='lower right') pylab.xlim([xgrid.min(), xgrid.max()]) pylab.ylim([xgrid.min(), xgrid.max()]) pylab.xlabel('x') if doCorrection: pylab.ylabel('D(x, \mu) + correction') else: pylab.ylabel('D(x, \mu)') if __name__ == "__main__": for doC in [1]: makePlot(muVals=[(0.01,0), (0.1,0), (1,0), (10,0)], doCorrection=doC) pylab.savefig('BregDivGauss1D_fixedMean_doC=%d.eps' % (doC), pad_inches=0, bbox_inches='tight') makePlot(muVals=[(4+1,-2), (1,0), (4+1,2), (16+1,4)], doCorrection=doC) pylab.savefig('BregDivGauss1D_fixedVar_doC=%d.eps' % (doC), pad_inches=0, bbox_inches='tight') makePlot(muVals=[(10,0), (4+1,2), (25+0.01,5)], doCorrection=doC) pylab.savefig('BregDivGauss1D_general_doC=%d.eps' % (doC), pad_inches=0, bbox_inches='tight') pylab.show(block=True)
pylab.savefig(outfilepath) pylab.close('all') print('Wrote: %s' % (outfilepath)) def parseArgs(**kwargs): ''' Read args from stdin into defined dict fields ''' parser = argparse.ArgumentParser() parser.add_argument('task_output_path') parser.add_argument('--lap', default=None, type=float) parser.add_argument('--taskids', type=str, default=None, help=taskidsHelpMsg) parser.add_argument('--vocabfile', type=str, default=None) args = parser.parse_args() arg_dict = vars(args) if args.vocabfile is not None: with open(args.vocabfile, 'r') as f: arg_dict['vocabList'] = list(map(str.strip, f.readlines())) return arg_dict if __name__ == "__main__": arg_dict = parseArgs() #plotCompsForJob(block=1, **argDict) if 'taskids' in arg_dict and arg_dict['taskids'] is not None: pass else: plotCompsForTask(**arg_dict) pylab.show()
def plotComps(tmpmodel, Data, compsToHighlight=None): bnpy.viz.PlotComps.plotCompsFromHModel(tmpmodel, Data=Data, compsToHighlight=compsToHighlight) pylab.show()
def plotJobs(jpaths, legNames, styles=None, density=2, xvar='laps', yvar='evidence', loc='upper right', xmin=None, xmax=None, taskids=None, savefilename=None, tickfontsize=None, bbox_to_anchor=None, **kwargs): ''' Create line plots for provided jobs. ''' nLines = len(jpaths) if nLines == 0: raise ValueError('Empty job list. Nothing to plot.') nLeg = len(legNames) for lineID in range(nLines): if styles is None: curStyle = dict(colorID=lineID) else: curStyle = styles[lineID] task_kwargs = dict(**kwargs) task_kwargs.update(curStyle) plot_all_tasks_for_job(jpaths[lineID], legNames[lineID], xvar=xvar, yvar=yvar, taskids=taskids, density=density, **task_kwargs) # Y-axis limit determination # If we have "enough" data about the run beyond two full passes of dataset, # we zoom in on the region of data beyond lap 2 if xvar == 'laps' and yvar == 'evidence': xmax = 0 ymin = np.inf ymin2 = np.inf ymax = -np.inf allRunsHaveXBeyond1 = True for line in pylab.gca().get_lines(): xd = line.get_xdata() yd = line.get_ydata() if xd.size < 3: allRunsHaveXBeyond1 = False continue posLap1 = np.searchsorted(xd, 1.0) posLap2 = np.searchsorted(xd, 2.0) if posLap1 < xd.size: ymin = np.minimum(ymin, yd[posLap1]) ymax = np.maximum(ymax, yd[posLap1:].max()) if posLap2 < xd.size: ymin2 = np.minimum(ymin2, yd[posLap2]) xmax = np.maximum(xmax, xd.max()) if xd.max() <= 1: allRunsHaveXBeyond1 = False if allRunsHaveXBeyond1 and xmax > 1.5: # If all relevant curves extend beyond x=1, only show that part xmin = 1.0 - 1e-5 else: xmin = 0 if allRunsHaveXBeyond1 and ymin2 < ymax: range1 = ymax - ymin range2 = ymax - ymin2 if 10 * range2 < range1: # Y values jump from lap1 to lap2 is enormous, # so let's just show y values from lap2 onward... ymin = ymin2 if (not np.allclose(ymax, ymin)) and allRunsHaveXBeyond1: pylab.ylim([ymin, ymax + 0.1 * (ymax - ymin)]) pylab.xlim([xmin, xmax + .05 * (xmax - xmin)]) if loc is not None and len(jpaths) > 1: pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor) if tickfontsize is not None: pylab.tick_params(axis='both', which='major', labelsize=tickfontsize) if savefilename is not None: try: pylab.show(block=False) except TypeError: pass # when using IPython notebook pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0) else: try: pylab.show(block=True) except TypeError: pass # when using IPython notebook
def plotExampleBarsDocs(Data, docIDsToPlot=None, figID=None, vmax=None, nDocToPlot=16, doShowNow=False, seed=0, randstate=np.random.RandomState(0), xlabels=None, W=1, H=1, **kwargs): kwargs['vmin'] = 0 kwargs['interpolation'] = 'nearest' if vmax is not None: kwargs['vmax'] = vmax if seed is not None: randstate = np.random.RandomState(seed) V = Data.vocab_size sqrtV = int(np.sqrt(V)) assert np.allclose(sqrtV * sqrtV, V) if docIDsToPlot is not None: nDocToPlot = len(docIDsToPlot) else: size = np.minimum(Data.nDoc, nDocToPlot) docIDsToPlot = randstate.choice(Data.nDoc, size=size, replace=False) ncols = 5 nrows = int(np.ceil(nDocToPlot / float(ncols))) if vmax is None: DocWordArr = Data.getDocTypeCountMatrix() vmax = int(np.max(np.percentile(DocWordArr, 98, axis=0))) if figID is None: figH, ha = pylab.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * W, nrows * H)) for plotPos, docID in enumerate(docIDsToPlot): start = Data.doc_range[docID] stop = Data.doc_range[docID + 1] wIDs = Data.word_id[start:stop] wCts = Data.word_count[start:stop] docWordHist = np.zeros(V) docWordHist[wIDs] = wCts squareIm = np.reshape(docWordHist, (sqrtV, sqrtV)) pylab.subplot(nrows, ncols, plotPos + 1) pylab.imshow(squareIm, **kwargs) pylab.axis('image') pylab.xticks([]) pylab.yticks([]) if xlabels is not None: pylab.xlabel(xlabels[plotPos]) # Disable empty plots! for kdel in range(plotPos + 2, nrows * ncols + 1): aH = pylab.subplot(nrows, ncols, kdel) aH.axis('off') # Fix margins between subplots pylab.subplots_adjust(wspace=0.04, hspace=0.04, left=0.01, right=0.99, top=0.99, bottom=0.01) if doShowNow: pylab.show()