コード例 #1
0
ファイル: BernViz.py プロジェクト: vishalbelsare/bnpy
def plotDataAsSquareImages(Data,
                           unitIDsToPlot=None,
                           figID=None,
                           nPlots=16,
                           doShowNow=False,
                           seed=0,
                           randstate=np.random.RandomState(0),
                           **kwargs):
    if seed is not None:
        randstate = np.random.RandomState(seed)
    if figID is None:
        pylab.figure()

    V = Data.dim
    assert isPerfectSquare(V)
    sqrtV = int(np.sqrt(V))
    if unitIDsToPlot is not None:
        nPlots = len(unitIDsToPlot)
    else:
        size = np.minimum(Data.nObs, nPlots)
        unitIDsToPlot = randstate.choice(Data.nObs, size=size, replace=False)
    nRows = np.floor(np.sqrt(nPlots))
    nCols = np.ceil(nPlots / nRows)

    for plotPos, unitID in enumerate(unitIDsToPlot):
        squareIm = np.reshape(Data.X[unitID], (sqrtV, sqrtV))
        pylab.subplot(nRows, nCols, plotPos + 1)
        pylab.imshow(squareIm, **imshowArgs)
        pylab.axis('image')
        pylab.xticks([])
        pylab.yticks([])
    pylab.tight_layout()
    if doShowNow:
        pylab.show()
コード例 #2
0
ファイル: BarsViz.py プロジェクト: vishalbelsare/bnpy
def plotBarsFromHModel(
        hmodel,
        Data=None,
        doShowNow=False,
        figH=None,
        doSquare=1,
        xlabels=[],
        compsToHighlight=None,
        compListToPlot=None,
        activeCompIDs=None,
        Kmax=50,
        width=6,
        height=3,
        vmax=None,
        block=0,  # unused
        jobname='',  # unused
        **kwargs):
    if vmax is not None:
        kwargs['vmax'] = vmax
    if hasattr(hmodel.obsModel, 'Post'):
        lam = hmodel.obsModel.Post.lam
        topics = lam / lam.sum(axis=1)[:, np.newaxis]
    else:
        topics = hmodel.obsModel.EstParams.phi.copy()

    # Determine intensity scale for topic-word image
    global imshowArgs
    if vmax is not None:
        imshowArgs['vmax'] = vmax
    else:
        imshowArgs['vmax'] = 1.5 * np.percentile(topics, 95)

    if doSquare:
        figH = showTopicsAsSquareImages(topics,
                                        activeCompIDs=activeCompIDs,
                                        compsToHighlight=compsToHighlight,
                                        compListToPlot=compListToPlot,
                                        Kmax=Kmax,
                                        figH=figH,
                                        xlabels=xlabels,
                                        **kwargs)
    else:
        if figH is None:
            figH = pylab.figure(figsize=(width, height))
        else:
            pylab.axes(figH)
        showAllTopicsInSingleImage(topics, compsToHighlight, **kwargs)
    if doShowNow:
        pylab.show()
    return figH
コード例 #3
0
ファイル: PlotHeldoutLik.py プロジェクト: vishalbelsare/bnpy
def plotJobs(jpaths, legNames, styles=None, fileSuffix='PredLik.mat',
             xvar='laps', yvar='avgLikScore', loc='upper right',
             minLap=0, showFinalPt=0,
             prefix='predlik',
             taskids=None, savefilename=None, tickfontsize=None,
             xjitter=None, bbox_to_anchor=None, **kwargs):
    ''' Create line plots for provided jobs
    '''
    nLines = len(jpaths)
    nLeg = len(legNames)
    assert nLines <= nLeg

    jitterByJob = np.linspace(-.5, .5, len(jpaths))

    for lineID in range(nLines):
        if styles is None:
            curStyle = dict(colorID=lineID)
        else:
            curStyle = styles[lineID]

        if xjitter is not None:
            xjitter = jitterByJob[lineID]
        plot_all_tasks_for_job(jpaths[lineID], legNames[lineID], minLap=minLap,
                               xvar=xvar, yvar=yvar, fileSuffix=fileSuffix,
                               showFinalPt=showFinalPt,
                               prefix=prefix,
                               taskids=taskids, xjitter=xjitter, **curStyle)

    if loc is not None and len(jpaths) > 1:
        pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor)

    if tickfontsize is not None:
        pylab.tick_params(axis='both', which='major', labelsize=tickfontsize)

    if savefilename is not None:
        try:
            pylab.show(block=False)
        except TypeError:
            pass  # when using IPython notebook
        pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0)
    else:
        try:
            pylab.show(block=True)
        except TypeError:
            pass  # when using IPython notebook
コード例 #4
0
ファイル: BernViz.py プロジェクト: vishalbelsare/bnpy
def plotCompsFromHModel(
        hmodel,
        doShowNow=False,
        block=0,  # unused
        jobname='',  # unused
        vocabList=None,  # catchall
        **kwargs):
    if hasattr(hmodel.obsModel, 'Post'):
        hmodel.obsModel.setEstParamsFromPost()
    phi = hmodel.obsModel.EstParams.phi.copy()

    dim = phi.shape[1]
    if dim > 9 and isPerfectSquare(dim):
        figH = plotCompsAsSquareImages(phi, **kwargs)
    else:
        figH = plotCompsAsRowsInSingleImage(phi, **kwargs)
    if doShowNow:
        pylab.show()
    return figH
コード例 #5
0
def plotCompsForJob(jobpath='', taskids=[1], lap=None,
                    **kwargs):
    ''' Show plot of learned clusters from run(s) saved results on disk
    '''

    # Verify given absolute path is valid.
    jobpath_originalarg = jobpath
    if not os.path.isdir(jobpath):
        # Fallback: try to prepend BNPYOUTDIR to handle "shortcut" names
        jobpath = os.path.join(os.environ['BNPYOUTDIR'], jobpath)
    if not os.path.isdir(jobpath):
        raise ValueError('Not valid path: ' + jobpath_originalarg)
    taskids = BNPYArgParser.parse_task_ids(jobpath, taskids)
    for tt, taskid in enumerate(taskids):
        if tt == 0 and isinstance(taskid, str):
            if taskid.startswith('.'):
                rankTasksForSingleJobOnDisk(jobpath)
        taskpath = os.path.join(jobpath, str(taskid))
        plotCompsForTask(taskpath, lap=lap, **kwargs)
    if 'block' in kwargs:
        pylab.show(block=kwargs['block'])
コード例 #6
0
def main():
    nDoc = 100
    Data = BarsK10V900.get_data(nDocTotal=nDoc, nWordsPerDoc=500, seed=12345)
    pathA = "$BNPYOUTDIR/BarsD100K10/seattle-alg=bnpyDPbirthmerge-lik=Mult-lam=0.1-K=10-initname=randexamples-nBatch=1/1/"
    pathB = "$BNPYOUTDIR/BarsD100K10/seattle-alg=bnpyDPbirthmerge-lik=Mult-lam=0.1-K=50-initname=randexamples-nBatch=1/1/"
    aM, aLP, aSS, aLscore = loadModelWithLPSSandLscore(Data, pathA, 'A')
    bM, bLP, bSS, bLscore = loadModelWithLPSSandLscore(Data, pathB, 'B')

    bZ = bLP['resp'].argmax(axis=1)
    aZ = aLP['resp'].argmax(axis=1)

    aaZ = alignEstimatedStateSeqToTruth(aZ, bZ)
    offDocIDs = np.flatnonzero(aaZ != bZ)

    from IPython import embed
    embed()

    return
    #for doc in offDocIDs:
    #  ktarget = aaZ[doc]
    #  targetUID = bSS.uids[ktarget]
    #  cLP = copy.deepcopy(bLP)
    #  cLP['resp'][doc,:] = 1e-40
    #  cLP['resp'][doc,ktarget] = 1.0
    #  calcLscoreFromModel(Data, bM, cLP, 'B swap doc %d' % (doc))
    #  bnpy.viz.BarsViz.plotExampleBarsDocs(Data, docIDsToPlot=[doc], vmax=10)

    cLP = copy.deepcopy(bLP)
    for doc in offDocIDs:
        ktarget = aaZ[doc]
        targetUID = bSS.uids[ktarget]
        cLP['resp'][doc, :] = 1e-40
        cLP['resp'][doc, ktarget] = 1.0
    calcLscoreFromModel(Data, bM, cLP, 'B swap both')

    for doc in offDocIDs:
        cLP = dict(resp=np.hstack([bLP['resp'], 1e-40 * np.ones((nDoc, 1))]))
        cLP['resp'][doc, :] = 1e-40
        cLP['resp'][doc, -1] = 1.0
        calcLscoreFromModel(Data, bM, cLP, 'B new doc %d' % (doc))

    pylab.show(block=0)
    pylab.draw()
    from IPython import embed
    embed()
    return
    #cLP = copy.deepcopy(bLP)
    #cLP['resp'][32,:] = 1e-40
    #cLP['resp'][32,10] = 1.0
    #calcLscoreFromModel(Data, bM, cLP, 'B swap 32')

    return
    cLP = copy.deepcopy(bLP)
    cLP['resp'][32, :] = 1e-40
    cLP['resp'][32, 10] = 1.0
    cLP['resp'][10, :] = 1e-40
    cLP['resp'][10, 10] = 1.0
    calcLscoreFromModel(Data, bM, cLP, 'B swap 10&32')

    cLP = dict(resp=np.hstack([bLP['resp'], 1e-40 * np.ones((40, 1))]))
    cLP['resp'][32, :] = 1e-40
    cLP['resp'][32, -1] = 1.0
    calcLscoreFromModel(Data, bM, cLP, 'B new 32')

    cLP = dict(resp=np.hstack([bLP['resp'], 1e-40 * np.ones((40, 1))]))
    cLP['resp'][10, :] = 1e-40
    cLP['resp'][10, -1] = 1.0
    calcLscoreFromModel(Data, bM, cLP, 'B new 10')

    bnpy.viz.BarsViz.plotExampleBarsDocs(Data, docIDsToPlot=[32], vmax=10)
    pylab.title('Doc 32')

    bnpy.viz.BarsViz.plotExampleBarsDocs(Data, docIDsToPlot=[10], vmax=10)
    pylab.title('Doc 10')

    bnpy.viz.BarsViz.plotExampleBarsDocs(Data,
                                         docIDsToPlot=docIDsInClusterWith32,
                                         vmax=10)
    pylab.title('Cluster 0')
    pylab.show(block=0)

    raw_input("Press any key to continue >>")

    for doc in [1, 2, 3, 10, 32]:
        ktarget = bZ[doc]
        targetUID = bSS.uids[ktarget]
        try:
            propXSS, Info = createSplitStats(Data,
                                             bM,
                                             bLP,
                                             curSSwhole=bSS,
                                             targetUID=targetUID,
                                             newUIDs=np.arange(100, 110),
                                             lapFrac=doc,
                                             b_cleanupMaxNumMergeIters=10,
                                             b_cleanupMaxNumAcceptPerIter=2,
                                             **b_kwargs)
        except BirthProposalError as e:
            print e
コード例 #7
0
def plotManyPanelsByPVar(jpathPattern='/tmp/',
                         pvar=None,
                         pvals=None,
                         W=5,
                         H=4,
                         savefilename=None,
                         doShowNow=False,
                         **kwargs):
    ''' Create line plots for jobs matching pattern and provided kwargs
    '''
    if pvar is None:
        jpathList = [jpathPattern]
        pvar = None
        pvals = [None]
    else:
        prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1])
        PPListMap = makePPListMapFromJPattern(jpathPattern)
        if pvals is None:
            pvals = PPListMap[pvar]
        else:
            pvals = [p for p in pvals if p in PPListMap[pvar]]
        jpathList = makeListOfJPatternsWithSpecificVals(
            PPListMap,
            prefixfilepath=prefixfilepath,
            key=pvar,
            vals=pvals,
            **kwargs)

    nrows = 1
    ncols = len(pvals)
    pylab.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * W, nrows * H))

    axH = None
    for panelID, panel_jobPattern in enumerate(jpathList):
        axH = pylab.subplot(nrows, ncols, panelID + 1, sharey=axH, sharex=axH)
        # Only show legend on first plot
        if panelID > 0 and 'loc' in kwargs:
            kwargs['loc'] = None
        kwargs['doShowNow'] = False
        plotMultipleLinesByLVar(panel_jobPattern, **kwargs)
        if pvar is not None:
            pylab.title('%s=%s' % (pvar, pvals[panelID]))

    pylab.subplots_adjust(bottom=0.15, wspace=0.5)

    if savefilename is not None:
        try:
            pylab.show(block=False)
        except TypeError:
            pass  # when using IPython notebook
        pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0)
    elif doShowNow:
        try:
            pylab.show(block=True)
        except TypeError:
            pass  # when using IPython notebook
    Info = dict(
        nrows=nrows,
        ncols=ncols,
    )
    return Info
コード例 #8
0
def plotMultipleLinesByLVar(jpathPattern,
                            lvar=None,
                            lvals=None,
                            ColorMap=DefaultColorList,
                            loc=None,
                            bbox_to_anchor=None,
                            savefilename=None,
                            tickfontsize=None,
                            doShowNow=False,
                            **kwargs):
    ''' Create line plots for provided jobs.
    '''
    prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1])
    PPListMap = makePPListMapFromJPattern(jpathPattern)
    if lvals is None:
        lvals = PPListMap[lvar]
    elif not isinstance(lvals, list):
        lvals = [lvals]
    # Make sure all lval values are street legal (aka exist on disk)
    lvals = [ll for ll in lvals if ll == '.best' or ll in PPListMap[lvar]]

    # Do ranking in advance for each relevant job
    '''if lvals[0] == '.best':
        xvar = kwargs['xvar']
        if 'xvals' in kwargs:
            xvals = kwargs['xvals']
        else:
            xvals = PPListMap[xvar]
        for xval in xvals:
            keyValDict = dict()
            keyValDict[xvar] = xval
            jpatternForXVal = makeJPatternWithSpecificVals(
                PPListMap,
                prefixfilepath=prefixfilepath, **keyValDict)
            TaskRanker.markBestAmongJobPatternOnDisk(jpatternForXVal)
    '''
    # Create list of jobs with corresponding pattern
    jpathList = makeListOfJPatternsWithSpecificVals(
        PPListMap,
        prefixfilepath=prefixfilepath,
        key=lvar,
        vals=lvals,
        **kwargs)
    for lineID, line_jobPattern in enumerate(jpathList):
        line_label = '%s=%s' % (lvar, lvals[lineID])
        if isinstance(ColorMap, dict):
            for label in [line_label, line_jobPattern]:
                try:
                    line_color = ColorMap[label]
                except KeyError:
                    line_color = DefaultColorList[lineID]
        else:
            # Access next elt in ColorMap list
            line_color = ColorMap[lineID]
        plotSingleLineAcrossJobsByXVar(line_jobPattern,
                                       label=line_label,
                                       color=line_color,
                                       lineID=lineID,
                                       lvar=lvar,
                                       **kwargs)

    if loc is not None and len(jpathList) > 1:
        pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor)
    if tickfontsize is not None:
        pylab.tick_params(axis='both', which='major', labelsize=tickfontsize)

    if savefilename is not None:
        try:
            pylab.show(block=False)
        except TypeError:
            pass  # when using IPython notebook
        pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0)
    elif doShowNow:
        try:
            pylab.show(block=True)
        except TypeError:
            pass  # when using IPython notebook
コード例 #9
0
def tryDeleteProposalForSpecificTarget_HDPTopicModel(
        Data,
        hmodel,
        LPkwargs=dict(),
        ktarget=0,
        kabsorbList=[1],
        verbose=True,
        doPlotComps=True,
        doPlotELBO=True,
        doPlotDocTopicCount=False,
        nELBOSteps=3,
        nUpdateSteps=5,
        d_initTargetDocTopicCount='warm_start',
        d_initWordCounts='none',
        **kwargs):
    ''' Execute merge for specific whole dataset

    Returns
    -------
    propModel : HModel
    propSS : SuffStatBag
    propLscore : scalar real
        ELBO score of proposed model
    curModel : HModel
    curSS : SuffStatBag
    curLscore : scalar real
        ELBO score of current model
    '''
    kabsorbList = parse_list_of_absorbing_comps(
        kabsorbList, ktarget, hmodel.obsModel.K)

    from bnpy.allocmodel.topics.HDPTopicRestrictedLocalStep2 \
        import summarizeRestrictedLocalStep_HDPTopicModel
    curModel = hmodel.copy()
    propModel = hmodel.copy()

    # Update current model
    if verbose:
        print ""
        print "Loading model from disk and performing local step..."
    starttime = time.time()
    curLP = curModel.calc_local_params(Data, **LPkwargs)
    curSS = curModel.get_global_suff_stats(Data, curLP, doPrecompEntropy=1)
    curModel.update_global_params(curSS)
    curLdict = curModel.calc_evidence(SS=curSS, todict=1)
    curLscore = curLdict['Ltotal']
    if verbose:
        print "%5.1f sec to obtain current model, LP, and SS" % (
            time.time() - starttime)

    nontrivialdocIDs = np.flatnonzero(curLP['DocTopicCount'][:, ktarget] > .01)
    sort_mask = np.argsort(-1*curLP['DocTopicCount'][nontrivialdocIDs, ktarget]) 
    nontrivialdocIDs = nontrivialdocIDs[sort_mask]
    docIDs = nontrivialdocIDs[:5]
    if verbose:
        print ""
        print "Proposing deletion of cluster %d" % (ktarget)
        print "    total mass N_k = %.1f" % (curSS.getCountVec()[ktarget])
        print "    %d docs with non-trivial mass" % (nontrivialdocIDs.size)
        print ""
        print "Absorbing into %d/%d remaining clusters" % (
            len(kabsorbList), curSS.K-1)
        print " ".join(['%3d' % (kk) for kk in kabsorbList])
        print ""

    # Create init observation model for absorbing states
    xObsModel = propModel.obsModel.copy()
    xinitSS = curSS.copy(includeELBOTerms=False, includeMergeTerms=False)
    for k in reversed(np.arange(xObsModel.K)):
        if k not in kabsorbList:
            xinitSS.removeComp(k)
    # Find clusters correlated in appearance with the target
    if curModel.getObsModelName().count('Mult') and d_initWordCounts.count('bycorr'):
        corrVec = calcCorrelationFromTargetToAbsorbingSet(
            curLP['DocTopicCount'], ktarget, kabsorbList)
        bestAbsorbIDs = np.flatnonzero(corrVec >= .001)
        print "absorbIDs with best correlation:"
        print bestAbsorbIDs
        for k in bestAbsorbIDs:
            xinitSS.WordCounts[k,:] += curSS.WordCounts[ktarget,:]
    xObsModel.update_global_params(xinitSS)

    # Create init pi vector for absorbing states
    curPiVec = propModel.allocModel.get_active_comp_probs()
    xPiVec = curPiVec[kabsorbList].copy()
    xPiVec /= xPiVec.sum()
    xPiVec *= (curPiVec[kabsorbList].sum() +  curPiVec[ktarget])
    assert np.allclose(np.sum(xPiVec),
        curPiVec[ktarget] + np.sum(curPiVec[kabsorbList]))

    if verbose:
        print "Reassigning target mass among absorbing set..."
    starttime = time.time()
    propLscoreList = list()
    for ELBOstep in range(nELBOSteps):
        xSS, Info = summarizeRestrictedLocalStep_HDPTopicModel(
            Dslice=Data,
            curModel=curModel,
            curLPslice=curLP,
            ktarget=ktarget,
            kabsorbList=kabsorbList,
            curPiVec=curPiVec,
            xPiVec=xPiVec,
            xObsModel=xObsModel,
            nUpdateSteps=nUpdateSteps,
            d_initTargetDocTopicCount=d_initTargetDocTopicCount,
            LPkwargs=LPkwargs)

        if ELBOstep < nELBOSteps - 1:
            # Update the xObsModel
            xObsModel.update_global_params(xSS)
            # TODO: update xPiVec???

        print " completed step %d/%d after %5.1f sec" % (
            ELBOstep+1, nELBOSteps, time.time() - starttime)

        propSS = curSS.copy()
        propSS.replaceCompsWithContraction(
            replaceSS=xSS,
            replaceUIDs=[curSS.uids[k] for k in kabsorbList],
            removeUIDs=[curSS.uids[ktarget]],
            )
        assert np.allclose(propSS.getCountVec().sum(),
            curSS.getCountVec().sum(),
            atol=0.01,
            rtol=0)
        propModel.update_global_params(propSS)
        propLdict = propModel.calc_evidence(SS=propSS, todict=1)
        propLscore = propModel.calc_evidence(SS=propSS)
        propLscoreList.append(propLscore)

    if verbose:
        print ""
        print "Proposal result:"
        if propLscore - curLscore > 0:
            print "  ACCEPTED"
        else:
            print "  REJECTED"
        print "%.4e  cur ELBO score" % (curLscore)
        print "%.4e prop ELBO score" % (propLscore)
        print "% .4e change in ELBO score" % (propLscore - curLscore)
        print ""
        for key in sorted(curLdict.keys()):
            if key.count('_') or key.count('total'):
                continue
            print "  gain %8s % .3e" % (
                key, propLdict[key] - curLdict[key])
        print ""
        if docIDs.size > 0:
            np.set_printoptions(suppress=1, precision=2, linewidth=120)
            xLPslice = Info['xLPslice']

            print "BEFORE"
            print "-----"
            print np.hstack([
                curLP['DocTopicCount'][docIDs,:][:,kabsorbList],
                curLP['DocTopicCount'][docIDs,:][:,ktarget][:,np.newaxis]
                ])
            print "AFTER"
            print "-----"
            print xLPslice['DocTopicCount'][docIDs,:]

    if doPlotELBO:
        import bnpy.viz
        from bnpy.viz.PlotUtil import pylab
        bnpy.viz.PlotUtil.ConfigPylabDefaults(pylab)
        iters = np.arange(len(propLscoreList))
        pylab.plot(iters, propLscoreList, 'b-')
        pylab.plot(iters, curLscore*np.ones_like(iters), 'k--')
        pylab.show()

    if doPlotDocTopicCount:
        import bnpy.viz
        from bnpy.viz.PlotUtil import pylab
        bnpy.viz.PlotUtil.ConfigPylabDefaults(pylab)

        kplotList = [x for x in kabsorbList]
        kplotList.append(ktarget)
        for d in docIDs:
            curDTClabels = ['%.1f' % (x) for x in 
                curLP['DocTopicCount'][d, kplotList]]
            bnpy.viz.PlotComps.plotCompsFromHModel(
                curModel,
                compListToPlot=kplotList,
                compsToHighlight=[ktarget],
                xlabels=curDTClabels,
                vmin=0,
                vmax=.01)
            fig = pylab.gcf()
            fig.canvas.set_window_title('doc %d BEFORE' % (d))

            propLP = Info['xLPslice']
            propDTClabels = ['%.1f' % (x) for x in 
                propLP['DocTopicCount'][d, :]]
            bnpy.viz.PlotComps.plotCompsFromHModel(
                propModel,
                xlabels=propDTClabels,
                vmin=0,
                vmax=.01)
            fig = pylab.gcf()
            fig.canvas.set_window_title('doc %d AFTER' % (d))
            pylab.show(block=False)

        # Plot docs        
        dIm = np.zeros((docIDs.size*2, 900))
        dImLabels = list()
        tImLabels = list()
        row = 0
        for ii,d in enumerate(docIDs):
            start = Data.doc_range[d]
            stop = Data.doc_range[d+1]
            wid = Data.word_id[start:stop]
            wct = Data.word_count[start:stop]
            dIm[row, wid] = wct
            dImLabels.append('doc %d' % (d))

            tmask = np.flatnonzero(curLP['resp'][start:stop, ktarget] > .01)
            targetDoc = np.zeros(900)
            dIm[row+docIDs.size, wid[tmask]] = wct[tmask] \
                * curLP['resp'][start + 1*tmask, ktarget]
            tImLabels.append('trgt doc %d' % (d))
            row += 1

        bnpy.viz.BarsViz.showTopicsAsSquareImages(
            dIm,
            ncols=2,
            vmin=0,
            vmax=1,
            xlabels=dImLabels.extend(tImLabels),
            cmap='jet')
        pylab.show()

    if doPlotComps:
        import bnpy.viz
        from bnpy.viz.PlotUtil import pylab
        bnpy.viz.PlotUtil.ConfigPylabDefaults(pylab)

        bnpy.viz.PlotComps.plotCompsFromSS(
                curModel,
                curSS,
                compsToHighlight=[ktarget],
                vmin=0,
                vmax=.01)
        fig = pylab.gcf()
        fig.canvas.set_window_title('BEFORE')

        bnpy.viz.PlotComps.plotCompsFromSS(
                propModel,
                propSS,
                vmin=0,
                vmax=.01)
        fig = pylab.gcf()
        fig.canvas.set_window_title('AFTER')
        pylab.show()
    return (
        propModel,
        propSS,
        propLscoreList,
        curModel,
        curSS,
        curLscore)
コード例 #10
0
ファイル: TestBregDiv_Gauss1D.py プロジェクト: jpfeil/hydra
def makePlot(muVals=[(0.01,0), (0.1,0), (1,0), (10,0)], doCorrection=1):
    pylab.figure()
    xgrid = np.linspace(-8, 8, 2000)
    pylab.hold('on')
    pylab.plot(xgrid, np.zeros_like(xgrid), ':', alpha=0.2)
    for mu1, mu2 in muVals:
        ygrid = calcBregDiv_Gauss1D(xgrid, mu1, mu2, doCorrection=doCorrection)
        print ygrid.min()
        pylab.plot(xgrid, ygrid, label='mu1=% 6.2f mu2=% 6.2f' % (mu1, mu2))
    pylab.legend(loc='lower right')
    pylab.xlim([xgrid.min(), xgrid.max()])
    pylab.ylim([xgrid.min(), xgrid.max()])
    pylab.xlabel('x')
    if doCorrection:
        pylab.ylabel('D(x, \mu) + correction')
    else:
        pylab.ylabel('D(x, \mu)')

if __name__ == "__main__":
    for doC in [1]:
        makePlot(muVals=[(0.01,0), (0.1,0), (1,0), (10,0)], doCorrection=doC)
        pylab.savefig('BregDivGauss1D_fixedMean_doC=%d.eps' % (doC),
            pad_inches=0, bbox_inches='tight')
        makePlot(muVals=[(4+1,-2), (1,0), (4+1,2), (16+1,4)], doCorrection=doC)
        pylab.savefig('BregDivGauss1D_fixedVar_doC=%d.eps' % (doC),
            pad_inches=0, bbox_inches='tight')   
        makePlot(muVals=[(10,0), (4+1,2), (25+0.01,5)], doCorrection=doC)
        pylab.savefig('BregDivGauss1D_general_doC=%d.eps' % (doC),
            pad_inches=0, bbox_inches='tight')
    pylab.show(block=True)
コード例 #11
0
        pylab.savefig(outfilepath)
        pylab.close('all')
        print('Wrote: %s' % (outfilepath))

def parseArgs(**kwargs):
    ''' Read args from stdin into defined dict fields
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument('task_output_path')
    parser.add_argument('--lap', default=None, type=float)
    parser.add_argument('--taskids',
        type=str, default=None,
        help=taskidsHelpMsg)
    parser.add_argument('--vocabfile',
        type=str, default=None)
    args = parser.parse_args()
    arg_dict = vars(args)
    if args.vocabfile is not None:
        with open(args.vocabfile, 'r') as f:
            arg_dict['vocabList'] = list(map(str.strip, f.readlines()))
    return arg_dict

if __name__ == "__main__":
    arg_dict = parseArgs()
    #plotCompsForJob(block=1, **argDict)
    if 'taskids' in arg_dict and arg_dict['taskids'] is not None:
        pass
    else:
        plotCompsForTask(**arg_dict)
    pylab.show()
コード例 #12
0
def plotComps(tmpmodel, Data, compsToHighlight=None):
    bnpy.viz.PlotComps.plotCompsFromHModel(tmpmodel,
                                           Data=Data,
                                           compsToHighlight=compsToHighlight)
    pylab.show()
コード例 #13
0
ファイル: PlotTrace.py プロジェクト: vishalbelsare/bnpy
def plotJobs(jpaths,
             legNames,
             styles=None,
             density=2,
             xvar='laps',
             yvar='evidence',
             loc='upper right',
             xmin=None,
             xmax=None,
             taskids=None,
             savefilename=None,
             tickfontsize=None,
             bbox_to_anchor=None,
             **kwargs):
    ''' Create line plots for provided jobs.
    '''
    nLines = len(jpaths)
    if nLines == 0:
        raise ValueError('Empty job list. Nothing to plot.')

    nLeg = len(legNames)

    for lineID in range(nLines):
        if styles is None:
            curStyle = dict(colorID=lineID)
        else:
            curStyle = styles[lineID]

        task_kwargs = dict(**kwargs)
        task_kwargs.update(curStyle)
        plot_all_tasks_for_job(jpaths[lineID],
                               legNames[lineID],
                               xvar=xvar,
                               yvar=yvar,
                               taskids=taskids,
                               density=density,
                               **task_kwargs)

    # Y-axis limit determination
    # If we have "enough" data about the run beyond two full passes of dataset,
    # we zoom in on the region of data beyond lap 2
    if xvar == 'laps' and yvar == 'evidence':
        xmax = 0
        ymin = np.inf
        ymin2 = np.inf
        ymax = -np.inf
        allRunsHaveXBeyond1 = True
        for line in pylab.gca().get_lines():
            xd = line.get_xdata()
            yd = line.get_ydata()
            if xd.size < 3:
                allRunsHaveXBeyond1 = False
                continue
            posLap1 = np.searchsorted(xd, 1.0)
            posLap2 = np.searchsorted(xd, 2.0)
            if posLap1 < xd.size:
                ymin = np.minimum(ymin, yd[posLap1])
                ymax = np.maximum(ymax, yd[posLap1:].max())
            if posLap2 < xd.size:
                ymin2 = np.minimum(ymin2, yd[posLap2])
            xmax = np.maximum(xmax, xd.max())
            if xd.max() <= 1:
                allRunsHaveXBeyond1 = False
        if allRunsHaveXBeyond1 and xmax > 1.5:
            # If all relevant curves extend beyond x=1, only show that part
            xmin = 1.0 - 1e-5
        else:
            xmin = 0
        if allRunsHaveXBeyond1 and ymin2 < ymax:
            range1 = ymax - ymin
            range2 = ymax - ymin2
            if 10 * range2 < range1:
                # Y values jump from lap1 to lap2 is enormous,
                # so let's just show y values from lap2 onward...
                ymin = ymin2
        if (not np.allclose(ymax, ymin)) and allRunsHaveXBeyond1:
            pylab.ylim([ymin, ymax + 0.1 * (ymax - ymin)])
        pylab.xlim([xmin, xmax + .05 * (xmax - xmin)])

    if loc is not None and len(jpaths) > 1:
        pylab.legend(loc=loc, bbox_to_anchor=bbox_to_anchor)
    if tickfontsize is not None:
        pylab.tick_params(axis='both', which='major', labelsize=tickfontsize)

    if savefilename is not None:
        try:
            pylab.show(block=False)
        except TypeError:
            pass  # when using IPython notebook
        pylab.savefig(savefilename, bbox_inches='tight', pad_inches=0)
    else:
        try:
            pylab.show(block=True)
        except TypeError:
            pass  # when using IPython notebook
コード例 #14
0
ファイル: BarsViz.py プロジェクト: vishalbelsare/bnpy
def plotExampleBarsDocs(Data,
                        docIDsToPlot=None,
                        figID=None,
                        vmax=None,
                        nDocToPlot=16,
                        doShowNow=False,
                        seed=0,
                        randstate=np.random.RandomState(0),
                        xlabels=None,
                        W=1,
                        H=1,
                        **kwargs):
    kwargs['vmin'] = 0
    kwargs['interpolation'] = 'nearest'
    if vmax is not None:
        kwargs['vmax'] = vmax
    if seed is not None:
        randstate = np.random.RandomState(seed)
    V = Data.vocab_size
    sqrtV = int(np.sqrt(V))
    assert np.allclose(sqrtV * sqrtV, V)
    if docIDsToPlot is not None:
        nDocToPlot = len(docIDsToPlot)
    else:
        size = np.minimum(Data.nDoc, nDocToPlot)
        docIDsToPlot = randstate.choice(Data.nDoc, size=size, replace=False)
    ncols = 5
    nrows = int(np.ceil(nDocToPlot / float(ncols)))
    if vmax is None:
        DocWordArr = Data.getDocTypeCountMatrix()
        vmax = int(np.max(np.percentile(DocWordArr, 98, axis=0)))

    if figID is None:
        figH, ha = pylab.subplots(nrows=nrows,
                                  ncols=ncols,
                                  figsize=(ncols * W, nrows * H))

    for plotPos, docID in enumerate(docIDsToPlot):
        start = Data.doc_range[docID]
        stop = Data.doc_range[docID + 1]
        wIDs = Data.word_id[start:stop]
        wCts = Data.word_count[start:stop]
        docWordHist = np.zeros(V)
        docWordHist[wIDs] = wCts
        squareIm = np.reshape(docWordHist, (sqrtV, sqrtV))
        pylab.subplot(nrows, ncols, plotPos + 1)
        pylab.imshow(squareIm, **kwargs)
        pylab.axis('image')
        pylab.xticks([])
        pylab.yticks([])
        if xlabels is not None:
            pylab.xlabel(xlabels[plotPos])

    # Disable empty plots!
    for kdel in range(plotPos + 2, nrows * ncols + 1):
        aH = pylab.subplot(nrows, ncols, kdel)
        aH.axis('off')

    # Fix margins between subplots
    pylab.subplots_adjust(wspace=0.04,
                          hspace=0.04,
                          left=0.01,
                          right=0.99,
                          top=0.99,
                          bottom=0.01)
    if doShowNow:
        pylab.show()