def plotDocUsageForProposal(docUsageByUID, savefilename=None, **kwargs): ''' Make trace plot of doc usage for each component. ''' pylab.figure() L = 0 maxVal = 0 for k, uid in enumerate(docUsageByUID): ys = np.asarray(docUsageByUID[uid]) xs = np.arange(0, ys.size) if k < 6: # only a few labels fit well on a legend pylab.plot(xs, ys, label=uid) else: pylab.plot(xs, ys) L = np.maximum(L, ys.size) maxVal = np.maximum(maxVal, ys.max()) # Use big chunk of left-hand side of plot for legend display xlims = np.asarray([-0.75 * L, L - 0.5]) pylab.xlim(xlims) pylab.xticks(np.arange(1, L)) pylab.ylim([0, 1.1 * maxVal]) pylab.xlabel('num proposal steps') pylab.ylabel('num docs using each comp') pylab.legend(loc='upper left', fontsize=12) pylab.subplots_adjust(left=0.2) if savefilename is not None: pylab.savefig(savefilename, pad_inches=0) pylab.close('all')
def plotExampleBarsDocs(Data, docIDsToPlot=None, figID=None, vmax=None, nDocToPlot=16, doShowNow=False, seed=0, randstate=np.random.RandomState(0), xlabels=None, W=1, H=1, **kwargs): kwargs['vmin'] = 0 kwargs['interpolation'] = 'nearest' if vmax is not None: kwargs['vmax'] = vmax if seed is not None: randstate = np.random.RandomState(seed) V = Data.vocab_size sqrtV = int(np.sqrt(V)) assert np.allclose(sqrtV * sqrtV, V) if docIDsToPlot is not None: nDocToPlot = len(docIDsToPlot) else: size = np.minimum(Data.nDoc, nDocToPlot) docIDsToPlot = randstate.choice(Data.nDoc, size=size, replace=False) ncols = 5 nrows = int(np.ceil(nDocToPlot / float(ncols))) if vmax is None: DocWordArr = Data.getDocTypeCountMatrix() vmax = int(np.max(np.percentile(DocWordArr, 98, axis=0))) if figID is None: figH, ha = pylab.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * W, nrows * H)) for plotPos, docID in enumerate(docIDsToPlot): start = Data.doc_range[docID] stop = Data.doc_range[docID + 1] wIDs = Data.word_id[start:stop] wCts = Data.word_count[start:stop] docWordHist = np.zeros(V) docWordHist[wIDs] = wCts squareIm = np.reshape(docWordHist, (sqrtV, sqrtV)) pylab.subplot(nrows, ncols, plotPos + 1) pylab.imshow(squareIm, **kwargs) pylab.axis('image') pylab.xticks([]) pylab.yticks([]) if xlabels is not None: pylab.xlabel(xlabels[plotPos]) # Disable empty plots! for kdel in xrange(plotPos + 2, nrows * ncols + 1): aH = pylab.subplot(nrows, ncols, kdel) aH.axis('off') # Fix margins between subplots pylab.subplots_adjust(wspace=0.04, hspace=0.04, left=0.01, right=0.99, top=0.99, bottom=0.01) if doShowNow: pylab.show()
def plotELBOtermsForProposal(curLdict, propLdictList, xs=None, ymin=-0.5, ymax=0.5, savefilename=None, **kwargs): ''' Create trace plot of ELBO gain/loss relative to current model. ''' pylab.figure() L = len(propLdictList) if xs is None: xs = np.arange(0, L) legendKeys = [] for key in curLdict: if key.count('_') == 0: legendKeys.append(key) for key in legendKeys: if key.count('total'): linewidth = 4 alpha = 1 style = '-' else: linewidth = 3 alpha = 0.5 style = '--' ys = np.asarray([propLdictList[i][key] for i in range(L)]) ys -= curLdict[key] pylab.plot(xs, ys, style, color=_getLineColorFromELBOKey(key), linewidth=linewidth, alpha=alpha, label=key) L = L + 1 xlims = np.asarray([-0.75 * L, L - 0.5]) pylab.xlim(xlims) pylab.xticks(xs) pylab.plot(xlims, np.zeros_like(xlims), 'k:') pylab.xlabel('num proposal steps') pylab.ylabel('L gain (prop - current)') pylab.legend(loc='lower left', fontsize=12) pylab.subplots_adjust(left=0.2) if savefilename is not None: pylab.savefig(savefilename, pad_inches=0) pylab.close('all')
def showTopicsAsSquareImages(topics, activeCompIDs=None, compsToHighlight=None, compListToPlot=None, xlabels=[], Kmax=50, ncols=5, W=1, H=1, figH=None, **kwargs): global imshowArgs local_imshowArgs = dict(**imshowArgs) for key in local_imshowArgs: if key in kwargs: local_imshowArgs[key] = kwargs[key] if len(xlabels) > 0: H = 1.5 * H K, V = topics.shape sqrtV = int(np.sqrt(V)) assert np.allclose(sqrtV, np.sqrt(V)) if compListToPlot is None: compListToPlot = np.arange(0, K) if activeCompIDs is None: activeCompIDs = np.arange(0, K) compsToHighlight = np.asarray(compsToHighlight) if compsToHighlight.ndim == 0: compsToHighlight = np.asarray([compsToHighlight]) # Create Figure Kplot = np.minimum(len(compListToPlot), Kmax) #ncols = 5 # int(np.ceil(Kplot / float(nrows))) nrows = int(np.ceil(Kplot / float(ncols))) if figH is None: # Make a new figure figH, ha = pylab.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * W, nrows * H)) else: # Use existing figure # TODO: Find a way to make this call actually change the figsize figH, ha = pylab.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * W, nrows * H), num=figH.number) for plotID, compID in enumerate(compListToPlot): if plotID >= Kmax: print 'DISPLAY LIMIT EXCEEDED. Showing %d/%d components' \ % (plotID, len(activeCompIDs)) break if compID not in activeCompIDs: aH = pylab.subplot(nrows, ncols, plotID + 1) aH.axis('off') continue kk = np.flatnonzero(compID == activeCompIDs)[0] topicIm = np.reshape(topics[kk, :], (sqrtV, sqrtV)) ax = pylab.subplot(nrows, ncols, plotID + 1) pylab.imshow(topicIm, **local_imshowArgs) pylab.xticks([]) pylab.yticks([]) # Draw colored border around highlighted topics if compID in compsToHighlight: [i.set_color('green') for i in ax.spines.itervalues()] [i.set_linewidth(3) for i in ax.spines.itervalues()] if xlabels is not None: if len(xlabels) > 0: pylab.xlabel(xlabels[plotID], fontsize=11) # Disable empty plots! for kdel in xrange(plotID + 2, nrows * ncols + 1): aH = pylab.subplot(nrows, ncols, kdel) aH.axis('off') # Fix margins between subplots pylab.subplots_adjust( wspace=0.1, hspace=0.1 * nrows, left=0.001, right=0.999, bottom=0.1, top=0.999) return figH
def plotSingleLineAcrossJobsByXVar(jpathPattern, label='', xvar=None, xvals=None, xlabel=None, yvar='evidence', lineStyle='.-', taskids='all', lineID=0, lvar='', **kwargs): ''' Create line plot in current figure for job matching the pattern Iterates over each xval in provided list of values. Each one corresponds to a single saved job. Post Condition -------------- Current axes have one line added. ''' prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1]) PPListMap = makePPListMapFromJPattern(jpathPattern) if xvals is None: xvals = PPListMap[xvar] xs = np.zeros(len(xvals)) ys = np.zeros(len(xvals)) jpathList = makeListOfJPatternsWithSpecificVals( PPListMap, prefixfilepath=prefixfilepath, key=xvar, vals=xvals, **kwargs) plotargs = copy.deepcopy(DefaultLinePlotKwArgs) # Plot all tasks as faint points with no connections for i, jobpath in enumerate(jpathList): if not os.path.exists(jobpath): raise ValueError("PATH NOT FOUND: %s" % (jobpath)) x = float(xvals[i]) for key in plotargs: if key in kwargs: plotargs[key] = kwargs[key] plotargs['markeredgecolor'] = plotargs['color'] alltaskids = BNPYArgParser.parse_task_ids(jobpath, taskids) for tid in alltaskids: y = loadYValFromDisk(jobpath, tid, yvar=yvar) pylab.plot(x, y, '.', **plotargs) # Plot top-ranked tasks as solid points connected by line for i, jobpath in enumerate(jpathList): rankTasksForSingleJobOnDisk(os.path.join(jobpath)) x = float(xvals[i]) y = loadYValFromDisk(jobpath, '.best', yvar=yvar) assert isinstance(x, float) assert isinstance(y, float) xs[i] = x ys[i] = y plotargs = copy.deepcopy(DefaultLinePlotKwArgs) for key in plotargs: if key in kwargs: plotargs[key] = kwargs[key] plotargs['markeredgecolor'] = plotargs['color'] plotargs['label'] = label pylab.plot(xs, ys, lineStyle, **plotargs) if lineID == 0: if xlabel is None: xlabel = xvar pylab.xlabel(xlabel) pylab.ylabel(LabelMap[yvar])
def plot_all_tasks_for_job(jobpath, label, taskids=None, lineType='.-', spreadLineType='--', color=None, yvar='avgLikScore', xvar='laps', markersize=10, linewidth=2, minLap=0, showFinalPt=0, fileSuffix='PredLik.mat', xjitter=None, prefix='predlik', colorID=0, **kwargs): ''' Create line plot in current figure for each task/run of jobpath ''' if not os.path.exists(jobpath): print 'PATH NOT FOUND', jobpath return None if not yvar.startswith('avg') and yvar.count('Kactive') == 0: yvar = 'avg' + yvar if not yvar.endswith('Score') and yvar.count('Kactive') == 0: yvar = yvar + 'Score' if color is None: color = Colors[colorID % len(Colors)] taskids = BNPYArgParser.parse_task_ids(jobpath, taskids) for tt, taskid in enumerate(taskids): taskoutpath = os.path.join(jobpath, taskid) hpaths = glob.glob(os.path.join(taskoutpath, '*' + fileSuffix)) txtpaths = glob.glob(os.path.join(taskoutpath, 'predlik-*.txt')) ys_hi = None ys_lo = None if len(txtpaths) > 0: if fileSuffix.endswith('.txt'): suffix = '-' + fileSuffix else: suffix = '.txt' if xvar.count('lap'): xs = np.loadtxt( os.path.join(taskoutpath, prefix + '-lapTrain.txt')) elif xvar.count('K'): xs = np.loadtxt(os.path.join(taskoutpath, prefix + '-K.txt')) elif xvar.count('time'): xs = np.loadtxt(os.path.join( taskoutpath, prefix + '-timeTrain.txt')) else: raise ValueError("Unrecognized xvar: " + xvar) if yvar.count('Kactive') and not yvar.count('Percentile'): ys = np.loadtxt(os.path.join(taskoutpath, prefix + '-' + yvar + 'Percentile50.txt')) ys_lo = np.loadtxt(os.path.join(taskoutpath, prefix + '-' + yvar + 'Percentile10.txt')) ys_hi = np.loadtxt(os.path.join(taskoutpath, prefix + '-' + yvar + 'Percentile90.txt')) else: ys = np.loadtxt( os.path.join(taskoutpath, prefix + '-' + yvar + suffix)) if minLap > 0 and taskoutpath.count('fix'): mask = laps > minLap xs = xs[mask] ys = ys[mask] elif len(hpaths) > 0: hpaths.sort() basenames = [x.split(os.path.sep)[-1] for x in hpaths] xs = np.asarray([float(x[3:11]) for x in basenames]) ys = np.zeros_like(xs) for ii, hpath in enumerate(hpaths): MatVars = scipy.io.loadmat(hpath) ys[ii] = float(MatVars['avgPredLL']) else: raise ValueError( 'Pred Lik data unavailable for job\n' + taskoutpath) plotargs = dict(markersize=markersize, linewidth=linewidth, label=None, color=color, markeredgecolor=color, ) plotargs.update(kwargs) if tt == 0: plotargs['label'] = label if xjitter is not None: xs = xs + xjitter pylab.plot(xs, ys, lineType, **plotargs) if ys_lo is not None: del plotargs['label'] pylab.plot(xs, ys_lo, spreadLineType, **plotargs) pylab.plot(xs, ys_hi, spreadLineType, **plotargs) if showFinalPt: pylab.plot(xs[-1], ys[-1], '.', **plotargs) pylab.xlabel(XLabelMap[xvar]) pylab.ylabel(YLabelMap[yvar])
def plotCovMatFromHModel(hmodel, compListToPlot=None, compsToHighlight=None, proba_thr=0.001, ax_handle=None, **kwargs): ''' Plot square image of covariance matrix for each component. Parameters ------- hmodel : bnpy HModel object compListToPlot : array-like of integer IDs of components within hmodel compsToHighlight : int or array-like integer IDs to highlight if None, all components get unique colors if not None, only highlighted components get colors. proba_thr : float Minimum weight assigned to component in order to be plotted. All components with weight below proba_thr are ignored. ''' nRow = 2 nCol = int(np.ceil(hmodel.obsModel.K / 2.0)) if ax_handle is None: ax_handle = pylab.subplots(nrows=nRow, ncols=nCol, figsize=(nCol * 2, nRow * 2)) else: pylab.subplots(nrows=nRow, ncols=nCol, num=ax_handle.number) if compsToHighlight is not None: compsToHighlight = np.asarray(compsToHighlight) if compsToHighlight.ndim == 0: compsToHighlight = np.asarray([compsToHighlight]) else: compsToHighlight = list() if compListToPlot is None: compListToPlot = np.arange(0, hmodel.obsModel.K) if hmodel.allocModel.K == hmodel.obsModel.K: w = hmodel.allocModel.get_active_comp_probs() else: w = np.ones(hmodel.obsModel.K) colorID = 0 for plotID, kk in enumerate(compListToPlot): if w[kk] < proba_thr and kk not in compsToHighlight: Sigma = getEmptyCompSigmaImage(hmodel.obsModel.D) clim = [0, 1] else: Sigma = hmodel.obsModel.get_covar_mat_for_comp(kk) clim = [-.25, 1] pylab.subplot(nRow, nCol, plotID + 1) pylab.imshow(Sigma, interpolation='nearest', cmap='hot', clim=clim) pylab.xticks([]) pylab.yticks([]) pylab.xlabel('%.2f' % (w[kk])) if kk in compsToHighlight: pylab.xlabel('***') for emptyID in xrange(plotID + 1, nRow * nCol): aH = pylab.subplot(nRow, nCol, emptyID + 1) aH.axis('off')