Ejemplo n.º 1
0
def plotDataAsSquareImages(Data,
                           unitIDsToPlot=None,
                           figID=None,
                           nPlots=16,
                           doShowNow=False,
                           seed=0,
                           randstate=np.random.RandomState(0),
                           **kwargs):
    if seed is not None:
        randstate = np.random.RandomState(seed)
    if figID is None:
        pylab.figure()

    V = Data.dim
    assert isPerfectSquare(V)
    sqrtV = int(np.sqrt(V))
    if unitIDsToPlot is not None:
        nPlots = len(unitIDsToPlot)
    else:
        size = np.minimum(Data.nObs, nPlots)
        unitIDsToPlot = randstate.choice(Data.nObs, size=size, replace=False)
    nRows = np.floor(np.sqrt(nPlots))
    nCols = np.ceil(nPlots / nRows)

    for plotPos, unitID in enumerate(unitIDsToPlot):
        squareIm = np.reshape(Data.X[unitID], (sqrtV, sqrtV))
        pylab.subplot(nRows, nCols, plotPos + 1)
        pylab.imshow(squareIm, **imshowArgs)
        pylab.axis('image')
        pylab.xticks([])
        pylab.yticks([])
    pylab.tight_layout()
    if doShowNow:
        pylab.show()
Ejemplo n.º 2
0
def plotExampleBarsDocs(Data, docIDsToPlot=None, figID=None,
                        vmax=None, nDocToPlot=16, doShowNow=False,
                        seed=0, randstate=np.random.RandomState(0),
                        xlabels=None,
                        W=1, H=1,
                        **kwargs):
    kwargs['vmin'] = 0
    kwargs['interpolation'] = 'nearest'
    if vmax is not None:
        kwargs['vmax'] = vmax
    if seed is not None:
        randstate = np.random.RandomState(seed)
    V = Data.vocab_size
    sqrtV = int(np.sqrt(V))
    assert np.allclose(sqrtV * sqrtV, V)
    if docIDsToPlot is not None:
        nDocToPlot = len(docIDsToPlot)
    else:
        size = np.minimum(Data.nDoc, nDocToPlot)
        docIDsToPlot = randstate.choice(Data.nDoc, size=size, replace=False)
    ncols = 5
    nrows = int(np.ceil(nDocToPlot / float(ncols)))
    if vmax is None:
        DocWordArr = Data.getDocTypeCountMatrix()
        vmax = int(np.max(np.percentile(DocWordArr, 98, axis=0)))

    if figID is None:
        figH, ha = pylab.subplots(nrows=nrows, ncols=ncols,
                                  figsize=(ncols * W, nrows * H))

    for plotPos, docID in enumerate(docIDsToPlot):
        start = Data.doc_range[docID]
        stop = Data.doc_range[docID + 1]
        wIDs = Data.word_id[start:stop]
        wCts = Data.word_count[start:stop]
        docWordHist = np.zeros(V)
        docWordHist[wIDs] = wCts
        squareIm = np.reshape(docWordHist, (sqrtV, sqrtV))
        pylab.subplot(nrows, ncols, plotPos + 1)
        pylab.imshow(squareIm, **kwargs)
        pylab.axis('image')
        pylab.xticks([])
        pylab.yticks([])
        if xlabels is not None:
            pylab.xlabel(xlabels[plotPos])

    # Disable empty plots!
    for kdel in xrange(plotPos + 2, nrows * ncols + 1):
        aH = pylab.subplot(nrows, ncols, kdel)
        aH.axis('off')

    # Fix margins between subplots
    pylab.subplots_adjust(wspace=0.04, hspace=0.04, left=0.01, right=0.99,
                          top=0.99, bottom=0.01)
    if doShowNow:
        pylab.show()
Ejemplo n.º 3
0
def plotBarsFromHModel(
        hmodel,
        Data=None,
        doShowNow=False,
        figH=None,
        doSquare=1,
        xlabels=[],
        compsToHighlight=None,
        compListToPlot=None,
        activeCompIDs=None,
        Kmax=50,
        width=6,
        height=3,
        vmax=None,
        block=0,  # unused
        jobname='',  # unused
        **kwargs):
    if vmax is not None:
        kwargs['vmax'] = vmax
    if hasattr(hmodel.obsModel, 'Post'):
        lam = hmodel.obsModel.Post.lam
        topics = lam / lam.sum(axis=1)[:, np.newaxis]
    else:
        topics = hmodel.obsModel.EstParams.phi.copy()

    # Determine intensity scale for topic-word image
    global imshowArgs
    if vmax is not None:
        imshowArgs['vmax'] = vmax
    else:
        imshowArgs['vmax'] = 1.5 * np.percentile(topics, 95)

    if doSquare:
        figH = showTopicsAsSquareImages(topics,
                                        activeCompIDs=activeCompIDs,
                                        compsToHighlight=compsToHighlight,
                                        compListToPlot=compListToPlot,
                                        Kmax=Kmax,
                                        figH=figH,
                                        xlabels=xlabels,
                                        **kwargs)
    else:
        if figH is None:
            figH = pylab.figure(figsize=(width, height))
        else:
            pylab.axes(figH)
        showAllTopicsInSingleImage(topics, compsToHighlight, **kwargs)
    if doShowNow:
        pylab.show()
    return figH
Ejemplo n.º 4
0
def plotJobs(jpaths, legNames, styles=None, fileSuffix='PredLik.mat',
             xvar='laps', yvar='avgLikScore', loc='upper right',
             minLap=0, showFinalPt=0,
             prefix='predlik',
             taskids=None, savefilename=None, tickfontsize=None,
             xjitter=None, bbox_to_anchor=None, **kwargs):
    ''' Create line plots for provided jobs
    '''
    nLines = len(jpaths)
    nLeg = len(legNames)
    assert nLines <= nLeg

    jitterByJob = np.linspace(-.5, .5, len(jpaths))

    for lineID in xrange(nLines):
        if styles is None:
            curStyle = dict(colorID=lineID)
        else:
            curStyle = styles[lineID]

        if xjitter is not None:
            xjitter = jitterByJob[lineID]
        plot_all_tasks_for_job(jpaths[lineID], legNames[lineID], minLap=minLap,
                               xvar=xvar, yvar=yvar, fileSuffix=fileSuffix,
                               showFinalPt=showFinalPt,
                               prefix=prefix,
                               taskids=taskids, xjitter=xjitter, **curStyle)

    if loc is not None and len(jpaths) > 1:
        pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor)

    if tickfontsize is not None:
        pylab.tick_params(axis='both', which='major', labelsize=tickfontsize)

    if savefilename is not None:
        try:
            pylab.show(block=False)
        except TypeError:
            pass  # when using IPython notebook
        pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0)
    else:
        try:
            pylab.show(block=True)
        except TypeError:
            pass  # when using IPython notebook
Ejemplo n.º 5
0
def plotCompsFromHModel(
        hmodel,
        doShowNow=False,
        block=0,  # unused
        jobname='',  # unused
        vocabList=None,  # catchall
        **kwargs):
    if hasattr(hmodel.obsModel, 'Post'):
        hmodel.obsModel.setEstParamsFromPost()
    phi = hmodel.obsModel.EstParams.phi.copy()

    dim = phi.shape[1]
    if dim > 9 and isPerfectSquare(dim):
        figH = plotCompsAsSquareImages(phi, **kwargs)
    else:
        figH = plotCompsAsRowsInSingleImage(phi, **kwargs)
    if doShowNow:
        pylab.show()
    return figH
Ejemplo n.º 6
0
def plotCompsForJob(jobpath='', taskids=[1], lap=None,
                    **kwargs):
    ''' Show plot of learned clusters from run(s) saved results on disk
    '''

    # Verify given absolute path is valid.
    jobpath_originalarg = jobpath
    if not os.path.isdir(jobpath):
        # Fallback: try to prepend BNPYOUTDIR to handle "shortcut" names
        jobpath = os.path.join(os.environ['BNPYOUTDIR'], jobpath)
    if not os.path.isdir(jobpath):
        raise ValueError('Not valid path: ' + jobpath_originalarg)
    taskids = BNPYArgParser.parse_task_ids(jobpath, taskids)
    for tt, taskid in enumerate(taskids):
        if tt == 0 and isinstance(taskid, str):
            if taskid.startswith('.'):
                rankTasksForSingleJobOnDisk(jobpath)
        taskpath = os.path.join(jobpath, str(taskid))
        plotCompsForTask(taskpath, lap=lap, **kwargs)
    if 'block' in kwargs:
        pylab.show(block=kwargs['block'])
Ejemplo n.º 7
0
        pylab.savefig(outfilepath)
        pylab.close('all')
        print 'Wrote: %s' % (outfilepath)

def parseArgs(**kwargs):
    ''' Read args from stdin into defined dict fields
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument('task_output_path')
    parser.add_argument('--lap', default=None, type=float)
    parser.add_argument('--taskids',
        type=str, default=None,
        help=taskidsHelpMsg)
    parser.add_argument('--vocabfile',
        type=str, default=None)
    args = parser.parse_args()
    arg_dict = vars(args)
    if args.vocabfile is not None:
        with open(args.vocabfile, 'r') as f:
            arg_dict['vocabList'] = map(str.strip, f.readlines())
    return arg_dict

if __name__ == "__main__":
    arg_dict = parseArgs()
    #plotCompsForJob(block=1, **argDict)
    if 'taskids' in arg_dict and arg_dict['taskids'] is not None:
        pass
    else:
        plotCompsForTask(**arg_dict)
    pylab.show()
Ejemplo n.º 8
0
def plotManyPanelsByPVar(jpathPattern='/tmp/',
                         pvar=None,
                         pvals=None,
                         W=5,
                         H=4,
                         savefilename=None,
                         doShowNow=False,
                         **kwargs):
    ''' Create line plots for jobs matching pattern and provided kwargs
    '''
    if pvar is None:
        jpathList = [jpathPattern]
        pvar = None
        pvals = [None]
    else:
        prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1])
        PPListMap = makePPListMapFromJPattern(jpathPattern)
        if pvals is None:
            pvals = PPListMap[pvar]
        else:
            pvals = [p for p in pvals if p in PPListMap[pvar]]
        jpathList = makeListOfJPatternsWithSpecificVals(
            PPListMap,
            prefixfilepath=prefixfilepath,
            key=pvar,
            vals=pvals,
            **kwargs)

    nrows = 1
    ncols = len(pvals)
    pylab.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * W, nrows * H))

    axH = None
    for panelID, panel_jobPattern in enumerate(jpathList):
        axH = pylab.subplot(nrows, ncols, panelID + 1, sharey=axH, sharex=axH)
        # Only show legend on first plot
        if panelID > 0 and 'loc' in kwargs:
            kwargs['loc'] = None
        kwargs['doShowNow'] = False
        plotMultipleLinesByLVar(panel_jobPattern, **kwargs)
        if pvar is not None:
            pylab.title('%s=%s' % (pvar, pvals[panelID]))

    pylab.subplots_adjust(bottom=0.15, wspace=0.5)

    if savefilename is not None:
        try:
            pylab.show(block=False)
        except TypeError:
            pass  # when using IPython notebook
        pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0)
    elif doShowNow:
        try:
            pylab.show(block=True)
        except TypeError:
            pass  # when using IPython notebook
    Info = dict(
        nrows=nrows,
        ncols=ncols,
    )
    return Info
Ejemplo n.º 9
0
def plotMultipleLinesByLVar(jpathPattern,
                            lvar=None,
                            lvals=None,
                            ColorMap=DefaultColorList,
                            loc=None,
                            bbox_to_anchor=None,
                            savefilename=None,
                            tickfontsize=None,
                            doShowNow=False,
                            **kwargs):
    ''' Create line plots for provided jobs.
    '''
    prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1])
    PPListMap = makePPListMapFromJPattern(jpathPattern)
    if lvals is None:
        lvals = PPListMap[lvar]
    elif not isinstance(lvals, list):
        lvals = [lvals]
    # Make sure all lval values are street legal (aka exist on disk)
    lvals = [ll for ll in lvals if ll == '.best' or ll in PPListMap[lvar]]

    # Do ranking in advance for each relevant job
    '''if lvals[0] == '.best':
        xvar = kwargs['xvar']
        if 'xvals' in kwargs:
            xvals = kwargs['xvals']
        else:
            xvals = PPListMap[xvar]
        for xval in xvals:
            keyValDict = dict()
            keyValDict[xvar] = xval
            jpatternForXVal = makeJPatternWithSpecificVals(
                PPListMap,
                prefixfilepath=prefixfilepath, **keyValDict)
            TaskRanker.markBestAmongJobPatternOnDisk(jpatternForXVal)
    '''
    # Create list of jobs with corresponding pattern
    jpathList = makeListOfJPatternsWithSpecificVals(
        PPListMap,
        prefixfilepath=prefixfilepath,
        key=lvar,
        vals=lvals,
        **kwargs)
    for lineID, line_jobPattern in enumerate(jpathList):
        line_label = '%s=%s' % (lvar, lvals[lineID])
        if isinstance(ColorMap, dict):
            for label in [line_label, line_jobPattern]:
                try:
                    line_color = ColorMap[label]
                except KeyError:
                    line_color = DefaultColorList[lineID]
        else:
            # Access next elt in ColorMap list
            line_color = ColorMap[lineID]
        plotSingleLineAcrossJobsByXVar(line_jobPattern,
                                       label=line_label,
                                       color=line_color,
                                       lineID=lineID,
                                       lvar=lvar,
                                       **kwargs)

    if loc is not None and len(jpathList) > 1:
        pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor)
    if tickfontsize is not None:
        pylab.tick_params(axis='both', which='major', labelsize=tickfontsize)

    if savefilename is not None:
        try:
            pylab.show(block=False)
        except TypeError:
            pass  # when using IPython notebook
        pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0)
    elif doShowNow:
        try:
            pylab.show(block=True)
        except TypeError:
            pass  # when using IPython notebook