Beispiel #1
0
def parse_args(xvar='laps', yvar='evidence'):
    ''' Returns Namespace of parsed arguments retrieved from command line
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument('dataName', type=str, default='AsteriskK8')
    parser.add_argument('jpath', type=str, default='demo*')

    parser.add_argument('--xvar', type=str, default=xvar,
                        choices=LabelMap.keys(),
                        help="name of x axis variable to plot.")

    parser.add_argument('--yvar', type=str, default=yvar,
                        #choices=LabelMap.keys(),
                        help="name of y axis variable to plot.")

    helpMsg = "ids of trials/runs to plot from given job." + \
              " Example: '4' or '1,2,3' or '2-6'."
    parser.add_argument(
        '--taskids', type=str, default=None, help=helpMsg)
    parser.add_argument(
        '--savefilename', type=str, default=None,
        help="location where to save figure (absolute path directory)")

    args, unkList = parser.parse_known_args()

    argDict = BNPYArgParser.arglist_to_kwargs(unkList, doConvertFromStr=False)
    argDict.update(args.__dict__)
    argDict['jpathPattern'] = os.path.join(os.environ['BNPYOUTDIR'],
                                           args.dataName,
                                           args.jpath)
    del argDict['dataName']
    del argDict['jpath']
    return argDict
Beispiel #2
0
def parse_args():
    ''' Returns Namespace of parsed arguments retrieved from command line
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument('dataName', type=str, default='AsteriskK8')
    parser.add_argument('jpath', type=str, default='demo*')

    helpMsg = "ids of trials/runs to plot from given job." + \
              " Example: '4' or '1,2,3' or '2-6'."
    parser.add_argument('--taskids', type=str, default=None, help=helpMsg)
    parser.add_argument(
        '--savefilename',
        type=str,
        default=None,
        help="location where to save figure (absolute path directory)")
    parser.add_argument('--fileSuffix', type=str, default='PredLik.mat')
    args, unkList = parser.parse_known_args()

    argDict = BNPYArgParser.arglist_to_kwargs(unkList)
    argDict.update(args.__dict__)
    argDict['jpathPattern'] = os.path.join(os.environ['BNPYOUTDIR'],
                                           args.dataName, args.jpath)
    del argDict['dataName']
    del argDict['jpath']
    return argDict
def parse_args(**kwargs):
    ''' Returns Namespace of parsed arguments retrieved from command line
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument('dataName', type=str, default='AsteriskK8')
    parser.add_argument('jpathPattern', type=str, default='demo*')
    parser.add_argument('--xvar',
                        type=str,
                        default=None,
                        help="name of x axis variable to plot.")
    parser.add_argument('--yvar',
                        type=str,
                        default='evidence',
                        choices=LabelMap.keys(),
                        help="name of y axis variable to plot.")
    parser.add_argument('--lvar',
                        type=str,
                        default=None,
                        help="quantity that varies across lines")
    parser.add_argument('--pvar',
                        type=str,
                        default=None,
                        help="quantity that varies across subplots")
    parser.add_argument(
        '--taskids',
        type=str,
        default='all',
        help="specify which task to plot (all, .best, .worst, etc)")
    parser.add_argument(
        '--savefilename',
        type=str,
        default=None,
        help="location where to save figure (absolute path directory)")
    args, unkList = parser.parse_known_args()
    argDict = BNPYArgParser.arglist_to_kwargs(unkList)
    argDict.update(args.__dict__)
    argDict.update(kwargs)
    argDict['jpathPattern'] = os.path.join(os.environ['BNPYOUTDIR'],
                                           args.dataName, args.jpathPattern)
    del argDict['dataName']
    for key in argDict:
        if key.endswith('vals'):
            if not isinstance(argDict[key], list):
                argDict[key] = argDict[key].split(',')
    return argDict
Beispiel #4
0
def plotCompsForJob(jobpath='', taskids=[1], lap=None, **kwargs):
    ''' Show plot of learned clusters from run(s) saved results on disk
    '''

    # Verify given absolute path is valid.
    jobpath_originalarg = jobpath
    if not os.path.isdir(jobpath):
        # Fallback: try to prepend BNPYOUTDIR to handle "shortcut" names
        jobpath = os.path.join(os.environ['BNPYOUTDIR'], jobpath)
    if not os.path.isdir(jobpath):
        raise ValueError('Not valid path: ' + jobpath_originalarg)
    taskids = BNPYArgParser.parse_task_ids(jobpath, taskids)
    for tt, taskid in enumerate(taskids):
        if tt == 0 and isinstance(taskid, str):
            if taskid.startswith('.'):
                rankTasksForSingleJobOnDisk(jobpath)
        taskpath = os.path.join(jobpath, str(taskid))
        plotCompsForTask(taskpath, lap=lap, **kwargs)
    if 'block' in kwargs:
        pylab.show(block=kwargs['block'])
Beispiel #5
0
            legNames = ['%s=%s' % (plotkey, x) for x in RangeMap[plotkey]]

        # Build list of final jpaths in order of decided legend
        keepListFinal = list()
        for x in RangeMap[plotkey]:
            for jID, jdict in enumerate(keepListD):
                if jdict[plotkey] == x:
                    keepListFinal.append(keepListP[jID])
    else:
        keepListFinal = keepListP[:1]
        legNames = [None]

    if verbose:
        print('\nLegend entries for selected jobs (auto-selected)')
        for name in legNames:
            print(name)

    return keepListFinal, legNames


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('dataName', default='AsteriskK8')
    parser.add_argument('jobName', default='bm')
    args, unkList = parser.parse_known_args()
    reqDict = BNPYArgParser.arglist_to_kwargs(unkList, doConvertFromStr=False)
    jpath = os.path.join(os.environ['BNPYOUTDIR'], args.dataName, args.jobName)

    keepJobs, legNames = filterJobs(jpath, verbose=1, **reqDict)
def plotSingleLineAcrossJobsByXVar(jpathPattern,
                                   label='',
                                   xvar=None,
                                   xvals=None,
                                   xlabel=None,
                                   yvar='evidence',
                                   lineStyle='.-',
                                   taskids='all',
                                   lineID=0,
                                   lvar='',
                                   **kwargs):
    ''' Create line plot in current figure for job matching the pattern

    Iterates over each xval in provided list of values.
    Each one corresponds to a single saved job.

    Post Condition
    --------------
    Current axes have one line added.
    '''
    prefixfilepath = os.path.sep.join(jpathPattern.split(os.path.sep)[:-1])
    PPListMap = makePPListMapFromJPattern(jpathPattern)
    if xvals is None:
        xvals = PPListMap[xvar]

    xs = np.zeros(len(xvals))
    ys = np.zeros(len(xvals))
    jpathList = makeListOfJPatternsWithSpecificVals(
        PPListMap,
        prefixfilepath=prefixfilepath,
        key=xvar,
        vals=xvals,
        **kwargs)

    plotargs = copy.deepcopy(DefaultLinePlotKwArgs)
    # Plot all tasks as faint points with no connections
    for i, jobpath in enumerate(jpathList):
        if not os.path.exists(jobpath):
            raise ValueError("PATH NOT FOUND: %s" % (jobpath))
        x = float(xvals[i])

        for key in plotargs:
            if key in kwargs:
                plotargs[key] = kwargs[key]
        plotargs['markeredgecolor'] = plotargs['color']

        alltaskids = BNPYArgParser.parse_task_ids(jobpath, taskids)
        for tid in alltaskids:
            y = loadYValFromDisk(jobpath, tid, yvar=yvar)
            pylab.plot(x, y, '.', **plotargs)

    # Plot top-ranked tasks as solid points connected by line
    for i, jobpath in enumerate(jpathList):
        rankTasksForSingleJobOnDisk(os.path.join(jobpath))
        x = float(xvals[i])
        y = loadYValFromDisk(jobpath, '.best', yvar=yvar)
        assert isinstance(x, float)
        assert isinstance(y, float)
        xs[i] = x
        ys[i] = y

    plotargs = copy.deepcopy(DefaultLinePlotKwArgs)
    for key in plotargs:
        if key in kwargs:
            plotargs[key] = kwargs[key]
    plotargs['markeredgecolor'] = plotargs['color']
    plotargs['label'] = label
    pylab.plot(xs, ys, lineStyle, **plotargs)

    if lineID == 0:
        if xlabel is None:
            xlabel = xvar
        pylab.xlabel(xlabel)
        pylab.ylabel(LabelMap[yvar])
Beispiel #7
0
def plotSingleJob(
    dataset,
    jobname,
    taskids='1',
    lap='final',
    sequences=[1],
    showELBOInTitle=False,
    dispTrue=True,
    aspectFactor=4.0,
    specialStateIDs=None,
    seqNames=None,
    cmap='Set1',
    maxT=None,
    colorManyToOne=False,
):
    '''
    Returns the array of Data corresponding to a single sequence to display

    If dispTrue = True, the true labels will be shown underneath the
      estimated labels
    '''
    # Make sequences zero-indexed
    if isinstance(sequences, str):
        sequences = np.asarray([int(x) for x in args.sequences.split(',')],
                               dtype=np.int32)
    sequences = np.asarray(sequences, dtype=np.int32)
    if np.min(sequences) < 1:
        raise ValueError('Sequences need to be one-index.\n' +
                         'Valid values are 1,2,...N.')
    sequences -= 1

    # Determine the jobpath and taskids
    jobpath = os.path.join(os.path.expandvars('$BNPYOUTDIR'), dataset, jobname)
    if isinstance(taskids, str):
        if taskids.startswith('.'):
            taskids = [taskids]
        else:
            taskids = BNPYArgParser.parse_task_ids(jobpath, taskids)
    elif isinstance(taskids, int):
        taskids = [str(taskids)]

    datasetPrefFile = os.path.join(jobpath, taskids[0],
                                   'args-DatasetPrefs.txt')
    datasetPrefs = dict()
    if os.path.exists(datasetPrefFile):
        with open(datasetPrefFile, 'r') as f:
            for line in f.readlines():
                fields = line.strip().split(' ')
                if len(fields) != 2:
                    continue
                datasetPrefs[fields[0]] = fields[1]

    # Load Data from its python module
    Datamod = imp.load_source(
        dataset, os.path.expandvars('$BNPYDATADIR/' + dataset + '.py'))
    if dataset == 'SpeakerDiar':
        if len(sequences) > 1:
            raise ValueError(
                'Joint modeling of several sequences makes no sense')
        Data = Datamod.get_data(meetingNum=sequences[0] + 1, **datasetPrefs)
        jobpath = jobpath.replace('SpeakerDiar',
                                  'SpeakerDiar' + str(sequences[0] + 1))
        sequences[0] = 0

    else:
        Data = Datamod.get_data(**datasetPrefs)

    # Determine the maximum length among any of the sequences to be plotted
    if maxT is None:
        Ts = Data.doc_range[sequences + 1] - Data.doc_range[sequences]
        maxT = np.max(Ts)

    # Define the number of pixels used by vertical space of figure
    NUM_STACK = int(np.ceil(maxT / float(aspectFactor)))
    if dispTrue:
        NUM_STACK /= 2

    f, axes = plt.subplots(len(sequences),
                           len(taskids),
                           sharex='col',
                           sharey='row')

    # For singleton case, make sure that axes is index-able
    if len(sequences) == 1 and len(taskids) == 1:
        axes = [axes]

    for tt, taskidstr in enumerate(taskids):
        if tt == 0 and taskidstr.startswith('.'):
            rankTasksForSingleJobOnDisk(jobpath)

        path = os.path.join(jobpath, taskidstr) + os.path.sep

        # Figure out which lap to use
        if lap == 'final':
            lapsFile = open(path + 'laps-saved-params.txt')
            curLap = lapsFile.readlines()
            curLap = float(curLap[-1])
            lapsFile.close()
        else:
            curLap = int(lap)

        if showELBOInTitle:
            hdists = np.loadtxt(os.path.join(path, 'hamming-distance.txt'))
            hlaps = np.loadtxt(os.path.join(path, 'laps-saved-params.txt'))
            Keffvals = np.loadtxt(os.path.join(path, 'Keff-saved-params.txt'))
            # Determine scalar values to display
            loc = np.argmin(np.abs(hlaps - curLap))
            hdist = hdists[loc]
            Kefffinal = Keffvals[loc]

            try:
                Kvals = np.loadtxt(os.path.join(path, 'K.txt'))
                ELBOscores = np.loadtxt(os.path.join(path, 'evidence.txt'))
                laps = np.loadtxt(os.path.join(path, 'laps.txt'))

                loc = np.argmin(np.abs(laps - curLap))
                ELBO = ELBOscores[loc]
                Kfinal = Kvals[loc]
            except IOError:
                ELBO = 0.0
                Kfinal = Kefffinal

        # Load in the saved Data from $BNPYOUTDIR
        try:
            filename = 'Lap%08.3fMAPStateSeqsAligned.mat' % curLap
            zHatBySeq = scipy.io.loadmat(path + filename)
            key1 = 'zHatBySeqAligned'
            key2 = 'zHatBySeq'
            if key1 in zHatBySeq:
                zHatBySeq = convertStateSeq_MAT2list(zHatBySeq[key1])
            elif key2 in zHatBySeq:
                zHatBySeq = convertStateSeq_MAT2list(zHatBySeq[key2])
            else:
                raise IOError
        except IOError:
            filename = 'Lap%08.3fMAPStateSeqs.mat' % curLap
            zHatBySeq = scipy.io.loadmat(path + filename)
            zHatBySeq = convertStateSeq_MAT2list(zHatBySeq['zHatBySeq'])

        if specialStateIDs is not None:
            zHatBySeq = relabelAllSequences(zHatBySeq, specialStateIDs)

        # Find maximum number of states we need to display
        nSeq = len(zHatBySeq)
        Kmax = np.max([zHatBySeq[i].max() for i in xrange(nSeq)])
        hasGroundTruth = False

        vmin = 0
        Kignore = 0
        if hasattr(Data, 'TrueParams') and 'Z' in Data.TrueParams:
            hasGroundTruth = True
            Kmax = np.maximum(Data.TrueParams['Z'].max(), Kmax)
            uLabels = np.unique(Data.TrueParams['Z'])
            Kignore = np.sum(uLabels < 0)
            if Kignore > 0:
                for k in range(1, Kignore + 1):
                    print('ignoring state %d  Ttrue = %d' %
                          (-k, np.sum(Data.TrueParams['Z'] == -k)))

            if colorManyToOne:
                # For each state in zHat, find best true sequence
                Zflat = convertStateSeq_list2flat(zHatBySeq, Data)
                ZflatA = -1 * np.ones_like(Zflat)
                for uID in np.unique(Zflat):
                    overlap = np.zeros(uLabels.size)
                    for ii, trueID in enumerate(uLabels):
                        overlap[ii] = np.sum(
                            np.logical_and(Data.TrueParams['Z'] == trueID,
                                           Zflat == uID))
                    bestii = overlap.argmax()
                    ZflatA[Zflat == uID] = uLabels[bestii]
                zHatBySeq = convertStateSeq_flat2list(ZflatA, Data)

        # In case there's only one sequence, make sure it's index-able
        for ii, seqNum in enumerate(sequences):
            image = np.tile(zHatBySeq[seqNum], (NUM_STACK, 1))

            # Add the true labels to the image (if they exist)
            if hasGroundTruth and dispTrue:
                start = Data.doc_range[seqNum]
                stop = Data.doc_range[seqNum + 1]
                img_trueZ = np.tile(Data.TrueParams['Z'][start:stop],
                                    (NUM_STACK, 1))
                if dispTrue == 2:
                    image = img_trueZ  # Show only true labels
                else:
                    image = np.vstack((image, img_trueZ))

            image = image[:, :maxT]
            if len(sequences) == 1 or len(taskids) == 1:
                cur_ax = axes[ii + tt]
            else:
                cur_ax = axes[ii, tt]

            if hasattr(cmap, 'N'):
                vmax = cmap.N
            else:
                vmax = Kmax

            cur_ax.imshow(Kignore + image + .0001,
                          interpolation='nearest',
                          vmin=vmin,
                          vmax=vmax,
                          cmap=cmap)
            if tt == 0:
                if seqNames is not None:
                    h = cur_ax.set_ylabel('%s' % (seqNames[ii]), fontsize=13)
                    h.set_rotation(0)

                elif len(sequences) > 4:
                    cur_ax.set_ylabel('%d' % (seqNum + 1), fontsize=13)
                else:
                    cur_ax.set_ylabel('Seq. %d' % (seqNum + 1), fontsize=13)

            if ii == 0:
                if showELBOInTitle:
                    fmtSpec = "ELBO: %.3f  K=%d Keff=%d  "
                    if hdist > 0.01:
                        fmtSpec += "dist=%.2f"
                    elif hdist > 0.001:
                        fmtSpec += "dist=%.3f"
                    else:
                        fmtSpec += "dist=%.4f"
                    title = fmtSpec % (ELBO, Kfinal, Kefffinal, hdist)
                    cur_ax.set_title(title)

            cur_ax.set_xlim([0, maxT])
            cur_ax.set_ylim([0, image.shape[0]])
            cur_ax.set_yticks([])
            # ... end loop over sequences
    return axes, zHatBySeq
Beispiel #8
0
def plotSingleJob(dataName, jobname, taskids='1', lap=None,
                  showELBOInTitle=True, cmap='gray', title='', mixZs=False):
    ''' Visualize results of single run
    '''

    # Parse the jobpath, and create example task paths
    jobpath = os.path.join(os.path.expandvars('$BNPYOUTDIR'),
                           dataName, jobname)
    if isinstance(taskids, str):
        taskids = BNPYArgParser.parse_task_ids(jobpath, taskids)
    elif isinstance(taskids, int):
        taskids = [str(taskids)]
    taskpath = os.path.join(jobpath, taskids[0])    

    # Load data, with same dataset size prefs as specified at inference time.
    dataKwargs = bnpy.ioutil.DataReader.loadDataKwargsFromDisk(taskpath)
    Data = bnpy.ioutil.DataReader.loadDataFromSavedTask(taskpath)
    AdjMat = np.squeeze(Data.toAdjacencyMatrix())
    if hasattr(Data, 'TrueParams'):
        if 'nodeZ' in Data.TrueParams:
            sortids = np.argsort(Data.TrueParams['nodeZ'])
            print 'Sorting nodes by true labels...'
        elif 'pi' in Data.TrueParams:
            sortids = np.argsort(Data.TrueParams['pi'].argmax(axis=1))
    else:
        sortids = np.arange(AdjMaj.shape[0])    
    # Rearrange the rows/cols of AdjMat
    AdjMat = AdjMat[sortids, :]
    AdjMat = AdjMat[:, sortids]
    if hasattr(Data, 'nodeNames'):
        nodeNames = [Data.nodeNames[s] for s in sortids]
    else:
        nodeNames = None
    # Show the true adj mat and the estimated side-by-side
    # First, the true adjacency matrix
    ncols = len(taskids)+1
    pylab.subplots(nrows=1, ncols=ncols, figsize=(3*ncols, 3))
    pylab.subplot(1, ncols, 1)
    pylab.imshow(AdjMat, cmap='Greys', interpolation='nearest', vmin=0, vmax=1)
    
    if len(nodeNames) < 25:
        pylab.gca().set_yticks(np.arange(len(nodeNames)))
        pylab.gca().set_yticklabels(nodeNames)

    for tt, taskid in enumerate(taskids):
        taskoutpath = os.path.join(jobpath, taskid) + os.path.sep
        # Load the model for the current task at specified lap
        hmodel, curLap = bnpy.ioutil.ModelReader.load_model_at_lap(
            taskoutpath, lap)
        # Compute expected state-state edge prob matrix Ew
        Ew = hmodel.obsModel.Post.lam1 / \
            (hmodel.obsModel.Post.lam1 + hmodel.obsModel.Post.lam0)
        isAssortative = str(type(hmodel.allocModel)).count('Assort')
        if isAssortative:
            K = hmodel.allocModel.K
            Ew_tmp = hmodel.allocModel.epsilon * np.ones((K, K, Ew.shape[-1]))
            for k in xrange(K):
                Ew_tmp[k,k] = Ew[k]
            Ew = Ew_tmp
        taskAdjMat = np.zeros((Data.nNodes, Data.nNodes, Data.dim))
        useLP = 0
        if useLP:
            LP = hmodel.calc_local_params(Data)
            for eid, (s,t) in enumerate(Data.edges):
                resp_st = LP['resp'][eid]
                if isAssortative:
                    taskAdjMat[s,t] = np.sum(
                        resp_st[:,np.newaxis] * Ew, axis=0)
                else:
                    assert np.allclose(resp_st.sum(), 1.0)
                    taskAdjMat[s,t] = np.sum(
                        resp_st[:,:,np.newaxis] * Ew, axis=(0,1))

        else:
            Epi = np.exp(hmodel.allocModel.E_logPi())
            for eid, (s,t) in enumerate(Data.edges):
                for d in xrange(Data.dim):
                    taskAdjMat[s,t,d] = np.inner(Epi[s,:], 
                        np.dot(Ew[:,:,d], Epi[t,:]))
        assert taskAdjMat.min() >= 0
        assert taskAdjMat.max() <= 1.0
        taskAdjMat = np.squeeze(taskAdjMat)
        taskAdjMat = taskAdjMat[sortids,:]
        taskAdjMat = taskAdjMat[:, sortids]
        pylab.subplot(1, ncols, 2+tt)
        pylab.imshow(taskAdjMat,
                   cmap='Greys', interpolation='nearest', vmin=0, vmax=1)
Beispiel #9
0
def plot_all_tasks_for_job(jobpath, label, taskids=None,
                           color=None,
                           colorID=0,
                           density=2,
                           yvar='evidence',
                           markersize=10,
                           linewidth=2,
                           linestyle='-',
                           drawLineToXMax=None,
                           showOnlyAfterLap=0,
                           xvar='laps',
                           **kwargs):
    ''' Create line plot in current figure for each task/run of jobpath
    '''
    if not os.path.exists(jobpath):
        if not jobpath.startswith(os.path.sep):
            jobpath_tmp = os.path.join(os.environ['BNPYOUTDIR'], jobpath)
            if not os.path.exists(jobpath_tmp):
                raise ValueError("PATH NOT FOUND: %s" % (jobpath))
            jobpath = jobpath_tmp
    if color is None:
        color = Colors[colorID % len(Colors)]
    taskids = BNPYArgParser.parse_task_ids(jobpath, taskids)

    if yvar == 'hamming-distance':
        yspfile = os.path.join(jobpath, taskids[0], yvar + '-saved-params.txt')
        if xvar == 'laps' and os.path.isfile(yspfile):
            xvar = 'laps-saved-params'

    for tt, taskid in enumerate(taskids):
        xs = None
        ys = None
        laps = None

        try:
            var_ext = ''
            ytxtfile = os.path.join(jobpath, taskid, yvar + '.txt')
            if not os.path.isfile(ytxtfile):
                var_ext = '-saved-params'
                ytxtfile = os.path.join(
                    jobpath, taskid, yvar + var_ext + '.txt')
            ys = np.loadtxt(ytxtfile)

            if ytxtfile.count('saved-params'):
                laptxtfile = os.path.join(jobpath, taskid, 'laps-saved-params.txt')
            else:
                laptxtfile = os.path.join(jobpath, taskid, 'laps.txt')
        except IOError as e:
            # TODO: when is this code needed?
            # xs, ys = loadXYFromTopicModelFiles(jobpath, taskid)
            try:
                if isinstance(xs, np.ndarray) and yvar.count('Keff'):
                    ys = loadKeffForTask(
                        os.path.join(jobpath, taskid), **kwargs)
                    assert xs.size == ys.size
                else:
                    # Heldout metrics
                    xs, ys = loadXYFromTopicModelSummaryFiles(
                        jobpath, taskid, xvar=xvar, yvar=yvar)
                    if showOnlyAfterLap and showOnlyAfterLap > 0:
                        laps, _ = loadXYFromTopicModelSummaryFiles(
                            jobpath, taskid, xvar='laps', yvar=yvar)
            except ValueError:
                try:
                    xs, ys = loadXYFromTopicModelSummaryFiles(jobpath, taskid)
                except ValueError:
                    raise e
        if yvar == 'hamming-distance' or yvar == 'Keff':
            if xvar == 'laps-saved-params':
                # fix off-by-one error, if we save an extra dist on final lap
                if xs.size == ys.size - 1:
                    ys = ys[:-1]
                elif ys.size == xs.size - 1:
                    xs = xs[:-1]  # fix off-by-one error, if we quit early
            elif xs.size != ys.size:
                # Try to subsample both time series at laps where they
                # intersect
                laps_x = np.loadtxt(os.path.join(jobpath, taskid, 'laps.txt'))
                laps_y = np.loadtxt(os.path.join(jobpath, taskid,
                                                 'laps-saved-params.txt'))
                assert xs.size == laps_x.size
                if ys.size == laps_y.size - 1:
                    laps_y = laps_y[:-1]
                xs = xs[np.in1d(laps_x, laps_y)]
                ys = ys[np.in1d(laps_y, laps_x)]

        if xs.size != ys.size:
            raise ValueError('Dimension mismatch. len(xs)=%d, len(ys)=%d'
                             % (xs.size, ys.size))

        # Cleanup laps data. Verify that it is sorted, with no collisions.
        if xvar == 'laps':
            diff = xs[1:] - xs[:-1]
            goodIDs = np.flatnonzero(diff >= 0)
            if len(goodIDs) < xs.size - 1:
                print( 'WARNING: looks like multiple runs writing to this file!')
                print( jobpath)
                print( 'Task: ', taskid)
                print( len(goodIDs), xs.size - 1)
                xs = np.hstack([xs[goodIDs], xs[-1]])
                ys = np.hstack([ys[goodIDs], ys[-1]])

        if xvar == 'laps' and yvar == 'evidence':
            mask = xs >= 1.0
            xs = xs[mask]
            ys = ys[mask]
        elif showOnlyAfterLap:
            # print "Filtering for data recorded at lap >= %s" % (
            #    showOnlyAfterLap)
            if laps is None:
                laps = np.loadtxt(laptxtfile)
            mask = laps >= showOnlyAfterLap
            xs = xs[mask]
            ys = ys[mask]
            
        # Force plot density (data points per lap) to desired specification
        # This avoids making plots that have huge file sizes,
        # due to too much content in the given display space
        if xvar == 'laps' and xs.size > 20 and np.sum(xs > 5) > 10:
            if (xs[-1] - xs[9]) != 0:
                curDensity = (xs.size - 10) / (xs[-1] - xs[9])
            else:
                curDensity = density
            while curDensity > density and xs.size > 11:
                # Thin xs and ys data by a factor of 2
                # while preserving the first 10 data points
                xs = np.hstack([xs[:10], xs[10::2]])
                ys = np.hstack([ys[:10], ys[10::2]])
                curDensity = (xs.size - 10) / (xs[-1] - xs[9])

        plotargs = dict(
            markersize=markersize,
            linewidth=linewidth,
            linestyle=linestyle,
            label=None,
            color=color, markeredgecolor=color)
        for key in kwargs:
            if key in plotargs:
                plotargs[key] = kwargs[key]
        if tt == 0:
            plotargs['label'] = label

        pylab.plot(xs, ys, **plotargs)
        if drawLineToXMax:
            xs_dashed = np.asarray([xs[-1], drawLineToXMax])
            ys_dashed = np.asarray([ys[-1], ys[-1]])
            plotargs['label'] = None
            pylab.plot(xs_dashed, ys_dashed, '--', **plotargs)


    pylab.xlabel(LabelMap[xvar])
    if yvar in LabelMap:
        yLabelStr = LabelMap[yvar]
        if yvar == 'Keff' and 'effCountThr' in kwargs:
            effCountThr = float(kwargs['effCountThr'])
            yLabelStr = yLabelStr + ' > %s' % (str(effCountThr))
        pylab.ylabel(yLabelStr)
Beispiel #10
0
def plot_all_tasks_for_job(jobpath,
                           label,
                           taskids=None,
                           lineType='.-',
                           spreadLineType='--',
                           color=None,
                           yvar='avgLikScore',
                           xvar='laps',
                           markersize=10,
                           linewidth=2,
                           minLap=0,
                           showFinalPt=0,
                           fileSuffix='PredLik.mat',
                           xjitter=None,
                           prefix='predlik',
                           colorID=0,
                           **kwargs):
    ''' Create line plot in current figure for each task/run of jobpath
    '''
    if not os.path.exists(jobpath):
        print('PATH NOT FOUND', jobpath)
        return None
    if not yvar.startswith('avg') and yvar.count('Kactive') == 0:
        yvar = 'avg' + yvar
    if not yvar.endswith('Score') and yvar.count('Kactive') == 0:
        yvar = yvar + 'Score'

    if color is None:
        color = Colors[colorID % len(Colors)]
    taskids = BNPYArgParser.parse_task_ids(jobpath, taskids)

    for tt, taskid in enumerate(taskids):
        taskoutpath = os.path.join(jobpath, taskid)
        hpaths = glob.glob(os.path.join(taskoutpath, '*' + fileSuffix))
        txtpaths = glob.glob(os.path.join(taskoutpath, 'predlik-*.txt'))
        ys_hi = None
        ys_lo = None
        if len(txtpaths) > 0:
            if fileSuffix.endswith('.txt'):
                suffix = '-' + fileSuffix
            else:
                suffix = '.txt'
            if xvar.count('lap'):
                xs = np.loadtxt(
                    os.path.join(taskoutpath, prefix + '-lapTrain.txt'))
            elif xvar.count('K'):
                xs = np.loadtxt(os.path.join(taskoutpath, prefix + '-K.txt'))
            elif xvar.count('time'):
                xs = np.loadtxt(
                    os.path.join(taskoutpath, prefix + '-timeTrain.txt'))
            else:
                raise ValueError("Unrecognized xvar: " + xvar)
            if yvar.count('Kactive') and not yvar.count('Percentile'):
                ys = np.loadtxt(
                    os.path.join(taskoutpath,
                                 prefix + '-' + yvar + 'Percentile50.txt'))
                ys_lo = np.loadtxt(
                    os.path.join(taskoutpath,
                                 prefix + '-' + yvar + 'Percentile10.txt'))
                ys_hi = np.loadtxt(
                    os.path.join(taskoutpath,
                                 prefix + '-' + yvar + 'Percentile90.txt'))
            else:
                ys = np.loadtxt(
                    os.path.join(taskoutpath, prefix + '-' + yvar + suffix))

            if minLap > 0 and taskoutpath.count('fix'):
                mask = laps > minLap
                xs = xs[mask]
                ys = ys[mask]
        elif len(hpaths) > 0:
            hpaths.sort()
            basenames = [x.split(os.path.sep)[-1] for x in hpaths]
            xs = np.asarray([float(x[3:11]) for x in basenames])
            ys = np.zeros_like(xs)
            for ii, hpath in enumerate(hpaths):
                MatVars = scipy.io.loadmat(hpath)
                ys[ii] = float(MatVars['avgPredLL'])
        else:
            raise ValueError('Pred Lik data unavailable for job\n' +
                             taskoutpath)

        plotargs = dict(
            markersize=markersize,
            linewidth=linewidth,
            label=None,
            color=color,
            markeredgecolor=color,
        )
        plotargs.update(kwargs)

        if tt == 0:
            plotargs['label'] = label
        if xjitter is not None:
            xs = xs + xjitter
        pylab.plot(xs, ys, lineType, **plotargs)
        if ys_lo is not None:
            del plotargs['label']
            pylab.plot(xs, ys_lo, spreadLineType, **plotargs)
            pylab.plot(xs, ys_hi, spreadLineType, **plotargs)

        if showFinalPt:
            pylab.plot(xs[-1], ys[-1], '.', **plotargs)
    pylab.xlabel(XLabelMap[xvar])
    pylab.ylabel(YLabelMap[yvar])