コード例 #1
0
ファイル: Sampling.py プロジェクト: jtorcasso/modelavg
def random_draw():
    '''draws a set of regressors at random
    
    Parameters
    ----------
    choices : array-like
        choices for the number of regressors to go
        in model
    K : int
        total number of regressors to select from
    
    Returns
    -------
    draw : list
        set of regressors
    '''

    model_space = modelcontext()
    allowed = model_space.k
    choices = model_space.choices
    keep = model_space.keep

    k = np.random.choice(allowed)
    
    cols = tuple(np.random.choice(choices, size=k-len(keep)+2, replace=False))
    
    return sorted(keep + tuple(cols))
コード例 #2
0
ファイル: Sampling.py プロジェクト: jtorcasso/modelavg
def pSampleAll(filename, tablename, groupname='', threads=2):
    '''samples from all models in parallel

    Parameters
    ----------
    filename : str
        filepath to save results
    tablename : str
        name of table to save
    groupname : str
        the group node for result storage
    threads : int
        number of processes to spawn

    Returns
    -------
    hfile : pytables file
        reference to on-disk storage
    '''

    model_space = modelcontext()
    allowed = model_space.k
    choices = model_space.choices
    keep = model_space.keep
    maxm = model_space.maxm

    p = mp.Pool(threads)

    cols = itertools.chain.from_iterable(iter(itertools.combinations(choices, k-len(keep)+2) \
                                          for k in allowed))
    
    # Pooling Results
    mapped = iter(p.map(_get_result, cols))
    p.close()
    p.join()

    # Saving Results
    num = len(keep) - 1 + len(choices)
    hfile = _create_table(filename, tablename, groupname, num)
    resultTable = getattr(hfile.get_node('/{}'.format(groupname)), tablename)

    for i, (c, fit) in enumerate(mapped):
        fitcols = sorted(keep + c)
        _append_result(resultTable, num, fit, fitcols)

    _process_results(resultTable)

    hfile.flush()

    return hfile
コード例 #3
0
ファイル: Sampling.py プロジェクト: jtorcasso/modelavg
def _get_result(cols):
    '''obtain model results

    Parameters
    ----------
    cols : list
        columns in data

    Returns
    -------
    rslt : tuple
        column-result pair
    '''

    model_space = modelcontext()
    keep = model_space.keep

    return (cols, model_space.fit(sorted(keep + cols)))
コード例 #4
0
ファイル: Sampling.py プロジェクト: jtorcasso/modelavg
def SampleAll(filename, tablename, groupname=''):
    '''samples from all of the models

    Parameters
    ----------
    filename : str
        filepath to save results
    tablename : str
        name of table to save
    groupname : str
        the group node for result storage

    Returns
    -------
    hfile : pytables file
        reference to on-disk storage
    '''

    model_space = modelcontext()
    allowed = model_space.k
    choices = model_space.choices
    keep = model_space.keep
    maxm = model_space.maxm

    cols = itertools.chain.from_iterable(iter(itertools.combinations(choices, k-len(keep)+2) \
                                          for k in allowed))

    num = len(keep) - 1 + len(choices)
    hfile = _create_table(filename, tablename, groupname, num)
    resultTable = getattr(hfile.get_node('/{}'.format(groupname)), tablename)

    for i,c in enumerate(cols):
        fitcols = sorted(keep + c)
        fit = model_space.fit(fitcols)
        _append_result(resultTable, num, fit, fitcols)

    _process_results(resultTable)

    hfile.flush()

    return hfile
コード例 #5
0
ファイル: Sampling.py プロジェクト: jtorcasso/modelavg
def mcmc_draw(last_draw):
    '''moves to next model in markov chain sampler for model space
    
    Parameters
    ----------
    last_draw : list
        set of regressors from previous draw
    cache : dict
        dictionary to store regression results
    
    Returns
    -------
    draw : list
        set of regressors
    '''

    model_space = modelcontext()
    allowed = model_space.k
    choices = model_space.choices
    keep = model_space.keep
    maxm = model_space.maxm
    
    width = len(keep) + len(choices)
    prev = np.zeros(width)
    prev[last_draw] = 1
    prev = prev.reshape((-1, 1))
    
    neighbors = abs(np.diag(np.ones(width)) - prev)[:, choices]
    neighbors = neighbors[:, np.any([neighbors.sum(axis=0) == i+1
                    for i in allowed], axis=0)]
    
    draw = random.choice(xrange(neighbors.shape[1]))
    
    proposal = sorted(np.arange(neighbors.shape[0])[neighbors[:, draw] == 1])
    
    return proposal    
コード例 #6
0
ファイル: Sampling.py プロジェクト: jtorcasso/modelavg
def pMCMC(visits, filename, tablename, groupname='', threads=2, **mcargs):
    '''parallel MCMC sampler for model space
    
    Parameters
    ----------
    visits : int
        number of total visits
    filename : str
        filepath to save results
    tablename : str
        name of table to save
    groupname : str
        the group node for result storage
    burn : int
        number of total visits to burn
    thin : int
        related to fraction of visits kept in chain
    kick : float
        minimum value for transition probability
    seed : int
        seed for random number
    threads : int
        number of threads to spawn for sampling
    
    Notes
    -----
    will run a markov chain with `visits` on every thread
    specified
    
    '''    

    burn = mcargs.get('burn', 0)
    thin = mcargs.get('thin', 1)
    kick = mcargs.get('kick', 0.)
    seed = mcargs.get('seed', 1234)

    model_space = modelcontext()
    maxm = model_space.maxm

    if visits >= maxm:
        return pSampleAll(filename, tablename, groupname, threads)

    d_visits = [int(visits/threads)]*threads
    d_visits = [v+(i<visits%threads) for i,v in enumerate(d_visits)]
    d_filename = ['t{}_{}'.format(i, filename) for i in xrange(threads)]
    d_tablename = [tablename]*threads
    d_groupname = [groupname]*threads
    d_burn = [int(burn/threads)]*threads
    d_burn = [b+(i<burn%threads) for i,b in enumerate(d_burn)]
    d_thin = [thin]*threads
    d_kick = [kick]*threads
    d_seed = [seed+i for i in xrange(threads)]

    argset = zip(d_visits, d_filename, d_tablename, d_groupname, 
        d_burn, d_thin, d_kick, d_seed)
    
    p = mp.Pool(threads)
    
    jobs = [p.apply_async(MCMC, args) for args in argset]
    for j in jobs: j.wait()
    
    p.close()
    p.join()

    # Pooling Results
    hfile = tables.open_file(d_filename[0], 'a')
    resultTable = getattr(hfile.get_node('/{}'.format(groupname)), tablename)
    for name in d_filename[1:]:
        h = tables.open_file(name, 'a')
        t = getattr(h.get_node('/{}'.format(groupname)), tablename)
        resultTable.append(t[:])
        h.close()
        os.remove(name)

    _process_results(resultTable)
    hfile.close()

    os.rename('t0_{}'.format(filename), filename)
    
    return tables.open_file(filename, 'a')
コード例 #7
0
ファイル: Sampling.py プロジェクト: jtorcasso/modelavg
def MCMC(visits, filename, tablename, groupname='', 
    burn=0, thin=1, kick=0., seed=1234):
    '''markov chain monte carlo sampler for model space
    
    Parameters
    ----------
    visits : int
        number of visits in chain
    filename : str
        filepath to save results
    tablename : str
        name of table to save
    groupname : str
        the group node for result storage
    burn : int
        number of visits to burn from beginning of chain
    thin : int
        related to fraction of visits kept in chain
    kick : float
        minimum value for transition probability
    seed : int
        seed for random number

    Returns
    -------
    hfile : pytables file
        reference to on-disk storage
    '''

    assert (kick <= 1) & (kick >= 0)

    model_space = modelcontext()
    allowed = model_space.k
    choices = model_space.choices
    keep = model_space.keep
    maxm = model_space.maxm

    if visits >= maxm:
        return SampleAll(filename, tablename, groupname)

    np.random.seed(seed)        

    if burn >= visits:
        raise ValueError('burn must be fewer than total visits')
    if thin < 1:
        raise ValueError('thin must be an integer 1 or greater')

    # Saving Results
    num = len(keep) - 1 + len(choices)
    hfile = _create_table(filename, tablename, groupname, num)
    resultTable = getattr(hfile.get_node('/{}'.format(groupname)), tablename)

    # Obtaining first draw at random
    last_draw = random_draw()
    fit = model_space.fit(last_draw)
    _append_result(resultTable, num, fit, last_draw)
    last_prob = resultTable.cols.posterior[-1]

    for i in xrange(1, visits):

        accepted = False

        while not accepted:

            proposal = mcmc_draw(last_draw)

            fit = model_space.fit(proposal)

            if last_prob == 0:
                prob = 1
            else:
                prob = min(1, max(kick, fit[0]/last_prob))

            if np.random.choice([True, False], p=[prob, 1 - prob]):

                last_draw = proposal

                _append_result(resultTable, num, fit, last_draw)

                last_prob = resultTable.cols.posterior[-1]

                accepted = True


    # Burning and thinning out visits in the chain
    if (burn > 0) or (thin > 1):
        resultTable.rename('{}Full'.format(tablename))
        
        selection = resultTable.copy(newname=tablename, start=burn, 
            stop=resultTable.shape[0], step=thin)

        resultTable.remove()
        resultTable = selection

    _process_results(resultTable)

    hfile.flush()

    return hfile