Beispiel #1
0
def test_create_flat_names_2d():
    shape = 2, 3
    result = ttab.create_flat_names('x', shape)
    expected = ['x__0_0', 'x__0_1', 'x__0_2',
                'x__1_0', 'x__1_1', 'x__1_2']
    assert result == expected
    assert ttab._create_shape(result) == shape
Beispiel #2
0
def extract_bounds_from_summary(summary, varname, shape, roundto=None):
    """
    Extract lower and upper bound of random variable.

    Returns
    -------
    list of num.Ndarray
    """
    def do_nothing(value):
        return value

    indexes = ttab.create_flat_names(varname, shape)
    lower_quant = 'hpd_2.5'
    upper_quant = 'hpd_97.5'

    bounds = []
    for quant in [lower_quant, upper_quant]:
        values = num.empty(shape, 'float64')
        for i, idx in enumerate(indexes):
            adjust = 10.**roundto
            if roundto is not None:
                if quant == lower_quant:
                    operation = num.floor
                elif quant == upper_quant:
                    operation = num.ceil
            else:
                operation = do_nothing
            values[i] = operation(summary[quant][idx] * adjust) / adjust

        bounds.append(values)

    return bounds
Beispiel #3
0
def load(con_str, model=None):
    """Load ODBC database.

    Parameters
    ----------
    con_str : str
        ODBC Connection string including database
    model : Model
        If None, the model is taken from the `with` context.

    Returns
    -------
    A MultiTrace instance
    """
    db = _ODBCDB(con_str)
    db.connect()
    name = _get_db_name(con_str)
    varnames = _get_table_list(db.cursor)
    if len(varnames) == 0:
        raise ValueError(('Can not get variable list for database'
                          '`{}`'.format(name)))
    chains = _get_chain_list(db.cursor, varnames[0])

    print(chains)
    straces = []
    for chain in chains:
        strace = ODBC(con_str, model=model)
        strace.chain = chain
        strace._var_cols = {varname: ttab.create_flat_names('v', shape)
                            for varname, shape in strace.var_shapes.items()}
        strace._is_setup = True
        strace.db = db  # Share the db with all traces.
        straces.append(strace)
    return base.MultiTrace(straces)
Beispiel #4
0
def dump(name, trace, chains=None):
    """
    Store values from NDArray trace as CSV files.

    Parameters
    ----------
    name : str
        Name of directory to store CSV files in
    trace : :class:`pymc3.backend.base.MultiTrace` of NDArray traces
        Result of MCMC run with default NDArray backend
    chains : list
        Chains to dump. If None, all chains are dumped.
    """

    if not os.path.exists(name):
        os.mkdir(name)
    if chains is None:
        chains = trace.chains

    var_shapes = trace._straces[chains[0]].var_shapes
    flat_names = {
        v: ttab.create_flat_names(v, shape)
        for v, shape in var_shapes.items()
    }

    for chain in chains:
        filename = os.path.join(name, 'chain-{}.csv'.format(chain))
        df = ttab.trace_to_dataframe(trace,
                                     chains=chain,
                                     flat_names=flat_names)
        df.to_csv(filename, index=False)
Beispiel #5
0
    def __init__(self, name, model=None, vars=None):
        if not os.path.exists(name):
            os.mkdir(name)
        super(TextChain, self).__init__(name, model, vars)

        self.flat_names = {v: ttab.create_flat_names(v, shape)
                           for v, shape in self.var_shapes.items()}
        self.filename = None
        self.df = None
        self.corrupted_flag = False
Beispiel #6
0
    def __init__(self,
                 dir_path='',
                 model=None,
                 vars=None,
                 buffer_size=5000,
                 buffer_thinning=1,
                 progressbar=False,
                 k=None):

        super(FileChain, self).__init__(model=model,
                                        vars=vars,
                                        buffer_size=buffer_size,
                                        buffer_thinning=buffer_thinning)

        if not os.path.exists(dir_path):
            os.mkdir(dir_path)

        self.dir_path = dir_path

        self.flat_names = OrderedDict()
        if self.var_shapes is not None:
            if k is not None:
                self.flat_names = OrderedDict()
                for var, shape in self.var_shapes.items():
                    if var in transd_vars_dist:
                        shape = (k, )

                    self.flat_names[var] = ttab.create_flat_names(var, shape)

            else:
                for v, shape in self.var_shapes.items():
                    self.flat_names[v] = ttab.create_flat_names(v, shape)

        self.k = k

        self.corrupted_flag = False
        self.progressbar = progressbar

        self.stored_samples = 0
        self.draws = 0
        self._df = None
        self.filename = None
def dict2pd(statdict, labelname):
    """Small helper function to transform a diagnostics output dict into a
    pandas Series.
    """
    var_dfs = []
    for key, value in statdict.items():
        var_df = pd.Series(value.flatten())
        var_df.index = ttab.create_flat_names(key, value.shape)
        var_dfs.append(var_df)
    statpd = pd.concat(var_dfs, axis=0)
    statpd = statpd.rename(labelname)
    return statpd
Beispiel #8
0
    def __init__(self,
                 name,
                 model=None,
                 vars=None,
                 buffer_size=5000,
                 progressbar=False,
                 k=None):

        if not os.path.exists(name):
            os.mkdir(name)

        super(TextChain, self).__init__(name, model, vars)

        self.flat_names = None
        if self.var_shapes is not None:
            if k is not None:
                self.flat_names = {}
                for var, shape in self.var_shapes.items():
                    if var in transd_vars_dist:
                        shape = (k, )

                    self.flat_names[var] = ttab.create_flat_names(var, shape)

            else:
                self.flat_names = {
                    v: ttab.create_flat_names(v, shape)
                    for v, shape in self.var_shapes.items()
                }

        self.k = k
        self.filename = None
        self.df = None
        self.corrupted_flag = False
        self.progressbar = progressbar
        self.buffer_size = buffer_size
        self.stored_samples = 0
        self.buffer = []
Beispiel #9
0
    def add_derived_variables(self, source_type, n_sources=1):

        try:
            varnames = derived_variables_mapping[source_type]
            logger.info('Adding derived variables %s to '
                        'trace.' % list2string(varnames))
        except KeyError:
            logger.info('No derived variables for %s' % source_type)
            varnames = []

        for varname in varnames:
            shape = (n_sources, )
            self.flat_names[varname] = ttab.create_flat_names(varname, shape)
            self.var_shapes[varname] = shape
            self.var_dtypes[varname] = 'float64'
            self.varnames.append(varname)
Beispiel #10
0
    def setup(self, draws, chain):
        """Perform chain-specific setup.

        Parameters
        ----------
        draws : int
            Expected number of draws
        chain : int
            Chain number
        """
        self.db.connect()
        self.chain = chain

        if self._is_setup:
            self.draw_idx = self._get_max_draw(chain) + 1
            self._len = None
        else:  # Table has not been created.
            self._var_cols = {varname: ttab.create_flat_names('v', shape)
                              for varname, shape in self.var_shapes.items()}
            self._create_table()
            self._is_setup = True
        self._create_insert_queries()
Beispiel #11
0
def test_create_flat_names_1d():
    shape = (2, )
    result = ttab.create_flat_names("x", shape)
    expected = ["x__0", "x__1"]
    assert result == expected
    assert ttab._create_shape(result) == shape
Beispiel #12
0
def summary(trace,
            varnames=None,
            transform=lambda x: x,
            stat_funcs=None,
            extend=False,
            include_transformed=False,
            alpha=0.05,
            start=0,
            batches=None):
    R"""Create a data frame with summary statistics.

    Parameters
    ----------
    trace : MultiTrace instance
    varnames : list
        Names of variables to include in summary
    transform : callable
        Function to transform data (defaults to identity)
    stat_funcs : None or list
        A list of functions used to calculate statistics. By default,
        the mean, standard deviation, simulation standard error, and
        highest posterior density intervals are included.

        The functions will be given one argument, the samples for a
        variable as a 2 dimensional array, where the first axis
        corresponds to sampling iterations and the second axis
        represents the flattened variable (e.g., x__0, x__1,...). Each
        function should return either

        1) A `pandas.Series` instance containing the result of
           calculating the statistic along the first axis. The name
           attribute will be taken as the name of the statistic.
        2) A `pandas.DataFrame` where each column contains the
           result of calculating the statistic along the first axis.
           The column names will be taken as the names of the
           statistics.
    extend : boolean
        If True, use the statistics returned by `stat_funcs` in
        addition to, rather than in place of, the default statistics.
        This is only meaningful when `stat_funcs` is not None.
    include_transformed : bool
        Flag for reporting automatically transformed variables in addition
        to original variables (defaults to False).
    alpha : float
        The alpha level for generating posterior intervals. Defaults
        to 0.05. This is only meaningful when `stat_funcs` is None.
    start : int
        The starting index from which to summarize (each) chain. Defaults
        to zero.
    batches : None or int
        Batch size for calculating standard deviation for non-independent
        samples. Defaults to the smaller of 100 or the number of samples.
        This is only meaningful when `stat_funcs` is None.

    Returns
    -------
    `pandas.DataFrame` with summary statistics for each variable Defaults one
    are: `mean`, `sd`, `mc_error`, `hpd_2.5`, `hpd_97.5`, `n_eff` and `Rhat`.
    Last two are only computed for traces with 2 or more chains.

    Examples
    --------
    .. code:: ipython

        >>> import pymc3 as pm
        >>> trace.mu.shape
        (1000, 2)
        >>> pm.summary(trace, ['mu'])
                   mean        sd  mc_error     hpd_5    hpd_95
        mu__0  0.106897  0.066473  0.001818 -0.020612  0.231626
        mu__1 -0.046597  0.067513  0.002048 -0.174753  0.081924

                  n_eff      Rhat
        mu__0     487.0   1.00001
        mu__1     379.0   1.00203

    Other statistics can be calculated by passing a list of functions.

    .. code:: ipython

        >>> import pandas as pd
        >>> def trace_sd(x):
        ...     return pd.Series(np.std(x, 0), name='sd')
        ...
        >>> def trace_quantiles(x):
        ...     return pd.DataFrame(pm.quantiles(x, [5, 50, 95]))
        ...
        >>> pm.summary(trace, ['mu'], stat_funcs=[trace_sd, trace_quantiles])
                     sd         5        50        95
        mu__0  0.066473  0.000312  0.105039  0.214242
        mu__1  0.067513 -0.159097 -0.045637  0.062912
    """

    if varnames is None:
        varnames = get_default_varnames(
            trace.varnames, include_transformed=include_transformed)

    if batches is None:
        batches = min([100, len(trace)])

    funcs = [
        lambda x: pd.Series(np.mean(x, 0), name='mean'),
        lambda x: pd.Series(np.std(x, 0), name='sd'),
        lambda x: pd.Series(mc_error(x, batches), name='mc_error'),
        lambda x: _hpd_df(x, alpha)
    ]

    if stat_funcs is not None:
        if extend:
            funcs = funcs + stat_funcs
        else:
            funcs = stat_funcs

    var_dfs = []
    for var in varnames:
        vals = transform(trace.get_values(var, burn=start, combine=True))
        flat_vals = vals.reshape(vals.shape[0], -1)
        var_df = pd.concat([f(flat_vals) for f in funcs], axis=1)
        var_df.index = ttab.create_flat_names(var, vals.shape[1:])
        var_dfs.append(var_df)
    dforg = pd.concat(var_dfs, axis=0)

    if (stat_funcs is not None) and (not extend):
        return dforg
    elif trace.nchains < 2:
        return dforg
    else:
        n_eff = pm.effective_n(trace,
                               varnames=varnames,
                               include_transformed=include_transformed)
        n_eff_pd = dict2pd(n_eff, 'n_eff')
        rhat = pm.gelman_rubin(trace,
                               varnames=varnames,
                               include_transformed=include_transformed)
        rhat_pd = dict2pd(rhat, 'Rhat')
        #import pdb; pdb.set_trace()
        # return pd.concat([dforg, n_eff_pd, rhat_pd],
        #                  axis=1, join_axes=[dforg.index])
        return pd.concat([dforg, n_eff_pd, rhat_pd],
                         axis=1).reindex(dforg.index)
Beispiel #13
0
def test_create_flat_names_3d():
    shape = 2, 3, 4
    assert ttab._create_shape(ttab.create_flat_names('x', shape)) == shape
Beispiel #14
0
def test_create_flat_names_2d():
    shape = 2, 3
    result = ttab.create_flat_names('x', shape)
    expected = ['x__0_0', 'x__0_1', 'x__0_2', 'x__1_0', 'x__1_1', 'x__1_2']
    assert result == expected
    assert ttab._create_shape(result) == shape
Beispiel #15
0
def test_create_flat_names_0d():
    shape = ()
    result = ttab.create_flat_names('x', shape)
    expected = ['x']
    assert result == expected
    assert ttab._create_shape(result) == shape
Beispiel #16
0
def test_create_flat_names_0d():
    shape = ()
    result = ttab.create_flat_names('x', shape)
    expected = ['x']
    assert result == expected
    assert ttab._create_shape(result) == shape
Beispiel #17
0
def test_create_flat_names_3d():
    shape = 2, 3, 4
    assert ttab._create_shape(ttab.create_flat_names('x', shape)) == shape
Beispiel #18
0
def test_create_flat_names_2d():
    shape = 2, 3
    result = ttab.create_flat_names("x", shape)
    expected = ["x__0_0", "x__0_1", "x__0_2", "x__1_0", "x__1_1", "x__1_2"]
    assert result == expected
    assert ttab._create_shape(result) == shape