Esempio n. 1
0
def sample_emcee(model=None, nwalkers=500, samples=1000, burn=500, thin=10):
    import pymc.progressbar as pbar

    # This is the likelihood function for emcee
    def lnprob(vals):
        try:
            for val, var in zip(vals, model.stochastics):
                var.value = val
            return model.logp
        except mc.ZeroProbability:
            return -1 * inf

    # emcee parameters
    ndim = len(model.stochastics)

    # Find MAP
    mc.MAP(model).fit()
    start = empty(ndim)
    for i, var in enumerate(model.stochastics):
        start[i] = var.value

    # sample starting points for walkers around the MAP
    p0 = random.randn(ndim * nwalkers).reshape((nwalkers, ndim)) + start

    # instantiate sampler passing in the pymc likelihood function
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob)

    bar = pbar.progress_bar(burn + samples)
    i = 0

    # burn-in
    for pos, prob, state in sampler.sample(p0, iterations=burn):
        i += 1
        bar.update(i)
    sampler.reset()

    # sample
    try:
        for p, lnprob, lnlike in sampler.sample(pos,
                                                iterations=samples,
                                                thin=thin):
            i += 1
            bar.update(i)
#    except KeyboardInterrupt:
#       pass
    finally:
        print("\nMean acceptance fraction during sampling: {}".format(
            mean(sampler.acceptance_fraction)))
        mcmc = mc.MCMC(model)  # MCMC instance for model
        mcmc.sample(1, progress_bar=False)  # This call is to set up the chains

        for i, var in enumerate(model.stochastics):
            var.trace._trace[0] = sampler.flatchain[:, i]

        return mcmc
Esempio n. 2
0
    def sample_emcee(
        self,
        nwalkers=500,
        samples=10,
        dispersion=0.1,
        burn=5,
        thin=1,
        stretch_width=2.0,
        anneal_stretch=True,
        pool=None,
    ):
        import emcee
        import pymc.progressbar as pbar

        # This is the likelihood function for emcee
        lnprob = LnProb(self)

        # init
        self.mcmc()

        # get current values
        stochs = self.get_stochastics()
        start = [node_descr["node"].value for name, node_descr in stochs.iterrows()]
        ndim = len(start)

        def init_from_priors():
            p0 = np.empty((nwalkers, ndim))
            i = 0
            while i != nwalkers:
                self.mc.draw_from_prior()
                try:
                    self.mc.logp
                    p0[i, :] = [node_descr["node"].value for name, node_descr in stochs.iterrows()]
                    i += 1
                except pm.ZeroProbability:
                    continue
            return p0

        if hasattr(self, "emcee_dispersions"):
            scale = np.empty_like(start)
            for i, (name, node_descr) in enumerate(stochs.iterrows()):
                knode_name = node_descr["knode_name"].replace("_subj", "")
                scale[i] = self.emcee_dispersions.get(knode_name, 0.1)
        else:
            scale = 0.1

        p0 = np.random.randn(ndim * nwalkers).reshape((nwalkers, ndim)) * scale * dispersion + start
        # p0 = init_from_priors()

        # instantiate sampler passing in the pymc likelihood function
        sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, a=stretch_width, pool=pool)

        bar = pbar.progress_bar(burn + samples)
        i = 0

        annealing = np.linspace(stretch_width, 2, burn)
        sys.stdout.flush()

        for pos, prob, state in sampler.sample(p0, iterations=burn):
            if anneal_stretch:
                sampler.a = annealing[i]
            i += 1
            bar.update(i)

        # print("\nMean acceptance fraction during burn-in: {}".format(np.mean(sampler.acceptance_fraction)))
        sampler.reset()

        # sample
        try:
            for p, lnprob, lnlike in sampler.sample(pos, iterations=samples, thin=thin):
                i += 1
                bar.update(i)
        except KeyboardInterrupt:
            pass
        finally:
            print(("\nMean acceptance fraction during sampling: {}".format(np.mean(sampler.acceptance_fraction))))
            # restore state
            for val, (name, node_descr) in zip(start, stochs.iterrows()):
                node_descr["node"].set_value(val)

            # Save samples back to pymc model
            self.mc.sample(1, progress_bar=False)  # This call is to set up the chains
            for pos, (name, node) in enumerate(stochs.iterrows()):
                node["node"].trace._trace[0] = sampler.flatchain[:, pos]

            return sampler
Esempio n. 3
0
def post_pred_gen(model,
                  groupby=None,
                  samples=500,
                  append_data=False,
                  add_model_parameters=False,
                  progress_bar=True):
    """Run posterior predictive check on a model.

    :Arguments:
        model : kabuki.Hierarchical
            Kabuki model over which to compute the ppc on.

    :Optional:
        samples : int
            How many samples to generate for each node.
        groupby : list
            Alternative grouping of the data. If not supplied, uses splitting
            of the model (as provided by depends_on).
        append_data : bool (default=False)
            Whether to append the observed data of each node to the replicatons.
        progress_bar : bool (default=True)
            Display progress bar

    :Returns:
        Hierarchical pandas.DataFrame with multiple sampled RT data sets.
        1st level: wfpt node
        2nd level: posterior predictive sample
        3rd level: original data index

    :See also:
        post_pred_stats
    """
    results = {}

    # Progress bar
    if progress_bar:
        n_iter = len(model.get_observeds())
        bar = pbar.progress_bar(n_iter)
        bar_iter = 0
    else:
        print("Sampling...")

    if groupby is None:
        iter_data = ((name, model.data.iloc[obs['node'].value.index])
                     for name, obs in model.iter_observeds())
    else:
        iter_data = model.data.groupby(groupby)

    for name, data in iter_data:
        node = model.get_data_nodes(data.index)

        if progress_bar:
            bar.update(bar_iter)
            bar_iter += 1

        if node is None or not hasattr(node, 'random'):
            continue  # Skip

        # If we used data grouping --> name is a tuple which doesn't play well with pd.concat later on
        # We exchange the name for the name of the observed node we currently process
        if groupby is not None:
            new_name = node.__str__()
        else:  # if groupby was None --> keep name as is
            new_name = name

        # Sample and generate stats
        datasets = _post_pred_generate(
            node,
            samples=samples,
            data=data,
            append_data=append_data,
            add_model_parameters=add_model_parameters)
        results[new_name] = pd.concat(datasets,
                                      names=['sample'],
                                      keys=list(range(len(datasets))))
    return pd.concat(results, names=['node'])
Esempio n. 4
0
    def sample_emcee(self,
                     nwalkers=500,
                     samples=10,
                     dispersion=.1,
                     burn=5,
                     thin=1,
                     stretch_width=2.,
                     anneal_stretch=True,
                     pool=None):
        import emcee
        import pymc.progressbar as pbar

        # This is the likelihood function for emcee
        lnprob = LnProb(self)

        # init
        self.mcmc()

        # get current values
        stochs = self.get_stochastics()
        start = [
            node_descr['node'].value for name, node_descr in stochs.iterrows()
        ]
        ndim = len(start)

        def init_from_priors():
            p0 = np.empty((nwalkers, ndim))
            i = 0
            while i != nwalkers:
                self.mc.draw_from_prior()
                try:
                    self.mc.logp
                    p0[i, :] = [
                        node_descr['node'].value
                        for name, node_descr in stochs.iterrows()
                    ]
                    i += 1
                except pm.ZeroProbability:
                    continue
            return p0

        if hasattr(self, 'emcee_dispersions'):
            scale = np.empty_like(start)
            for i, (name, node_descr) in enumerate(stochs.iterrows()):
                knode_name = node_descr['knode_name'].replace('_subj', '')
                scale[i] = self.emcee_dispersions.get(knode_name, 0.1)
        else:
            scale = 0.1

        p0 = np.random.randn(ndim * nwalkers).reshape(
            (nwalkers, ndim)) * scale * dispersion + start
        #p0 = init_from_priors()

        # instantiate sampler passing in the pymc likelihood function
        sampler = emcee.EnsembleSampler(nwalkers,
                                        ndim,
                                        lnprob,
                                        a=stretch_width,
                                        pool=pool)

        bar = pbar.progress_bar(burn + samples)
        i = 0

        annealing = np.linspace(stretch_width, 2, burn)
        sys.stdout.flush()

        for pos, prob, state in sampler.sample(p0, iterations=burn):
            if anneal_stretch:
                sampler.a = annealing[i]
            i += 1
            bar.update(i)

        #print("\nMean acceptance fraction during burn-in: {}".format(np.mean(sampler.acceptance_fraction)))
        sampler.reset()

        # sample
        try:
            for p, lnprob, lnlike in sampler.sample(pos,
                                                    iterations=samples,
                                                    thin=thin):
                i += 1
                bar.update(i)
        except KeyboardInterrupt:
            pass
        finally:
            print("\nMean acceptance fraction during sampling: {}".format(
                np.mean(sampler.acceptance_fraction)))
            # restore state
            for val, (name, node_descr) in zip(start, stochs.iterrows()):
                node_descr['node'].set_value(val)

            # Save samples back to pymc model
            self.mc.sample(
                1, progress_bar=False)  # This call is to set up the chains
            for pos, (name, node) in enumerate(stochs.iterrows()):
                node['node'].trace._trace[0] = sampler.flatchain[:, pos]

            return sampler
Esempio n. 5
0
def post_pred_gen(model, groupby=None, samples=500, append_data=False, progress_bar=True):
    """Run posterior predictive check on a model.

    :Arguments:
        model : kabuki.Hierarchical
            Kabuki model over which to compute the ppc on.

    :Optional:
        samples : int
            How many samples to generate for each node.
        groupby : list
            Alternative grouping of the data. If not supplied, uses splitting
            of the model (as provided by depends_on).
        append_data : bool (default=False)
            Whether to append the observed data of each node to the replicatons.
        progress_bar : bool (default=True)
            Display progress bar

    :Returns:
        Hierarchical pandas.DataFrame with multiple sampled RT data sets.
        1st level: wfpt node
        2nd level: posterior predictive sample
        3rd level: original data index

    :See also:
        post_pred_stats
    """
    results = {}

    # Progress bar
    if progress_bar:
        n_iter = len(model.get_observeds())
        bar = pbar.progress_bar(n_iter)
        bar_iter = 0
    else:
        print "Sampling..."

    if groupby is None:
        iter_data = ((name, model.data.ix[obs['node'].value.index]) for name, obs in model.iter_observeds())
    else:
        iter_data = model.data.groupby(groupby)

    for name, data in iter_data:
        node = model.get_data_nodes(data.index)

        if progress_bar:
            bar_iter += 1
            bar.update(bar_iter)

        if node is None or not hasattr(node, 'random'):
            continue # Skip

        ##############################
        # Sample and generate stats
        datasets = _post_pred_generate(node, samples=samples, data=data, append_data=append_data)
        results[name] = pd.concat(datasets, names=['sample'], keys=range(len(datasets)))

    if progress_bar:
        bar_iter += 1
        bar.update(bar_iter)

    return pd.concat(results, names=['node'])
def post_pred_gen(model, groupby=None, samples=500, append_data=False, progress_bar=True):
    """Run posterior predictive check on a model.

    :Arguments:
        model : kabuki.Hierarchical
            Kabuki model over which to compute the ppc on.

    :Optional:
        samples : int
            How many samples to generate for each node.
        groupby : list
            Alternative grouping of the data. If not supplied, uses splitting
            of the model (as provided by depends_on).
        append_data : bool (default=False)
            Whether to append the observed data of each node to the replicatons.
        progress_bar : bool (default=True)
            Display progress bar

    :Returns:
        Hierarchical pandas.DataFrame with multiple sampled RT data sets.
        1st level: wfpt node
        2nd level: posterior predictive sample
        3rd level: original data index

    :See also:
        post_pred_stats
    """
    results = {}

    # Progress bar
    if progress_bar:
        n_iter = len(model.get_observeds())
        bar = pbar.progress_bar(n_iter)
        bar_iter = 0
    else:
        print("Sampling...")

    if groupby is None:
        iter_data = ((name, model.data.ix[obs['node'].value.index]) for name, obs in model.iter_observeds())
    else:
        iter_data = model.data.groupby(groupby)

    for name, data in iter_data:
        node = model.get_data_nodes(data.index)
        
        #New addition: Reset index for non regression models
        if str(type(model)) == "<class 'hddm.models.hddm_info.HDDM'>":
            data = data.reset_index()

        if progress_bar:
            bar_iter += 1
            bar.update(bar_iter)

        if node is None or not hasattr(node, 'random'):
            continue # Skip

        ##############################
        # Sample and generate stats
        datasets = _post_pred_generate(node, samples=samples, data=data, append_data=append_data)
        results[name] = pd.concat(datasets, names=['sample'], keys=list(range(len(datasets))))
        
        #New addition: Convert results dict keys to single items for regression models with different conditions
    if list(results.keys())[0] != 'wfpt':
        if isinstance(list(results.keys())[0], str)==False:
            results={ '('+",".join(map(str,x))+')': results[x] for x in results.keys() }
            results={key.replace('.0','') : value for key, value in results.items()}
        else:
            results={ '('+x+')': results[x] for x in results.keys() }
            
    if progress_bar:
        bar_iter += 1
        bar.update(bar_iter)

    return pd.concat(results, names=['node'])
        progress_bar : bool (default=True)
            Display progress bar
    :Returns:
        Hierarchical pandas.DataFrame with multiple sampled RT data sets.
        1st level: wfpt node
        2nd level: posterior predictive sample
        3rd level: original data index
    :See also:
        post_pred_stats
    """
    results = {}
​
    # Progress bar
    if progress_bar:
        n_iter = len(model.get_observeds())
        bar = pbar.progress_bar(n_iter)
        bar_iter = 0
    else:
        print("Sampling...")
​
    if groupby is None:
        iter_data = ((name, model.data.iloc[obs['node'].value.index]) for name, obs in model.iter_observeds())
    else:
        iter_data = model.data.groupby(groupby)
​
    for name, data in iter_data:
        node = model.get_data_nodes(data.index)
​
        if progress_bar:
            bar_iter += 1
            bar.update(bar_iter)
Esempio n. 8
0
def post_pred_gen(model,
                  groupby=None,
                  samples=500,
                  append_data=False,
                  progress_bar=True):
    """Run posterior predictive check on a model.

    :Arguments:
        model : kabuki.Hierarchical
            Kabuki model over which to compute the ppc on.

    :Optional:
        samples : int
            How many samples to generate for each node.
        groupby : list
            Alternative grouping of the data. If not supplied, uses splitting
            of the model (as provided by depends_on).
        append_data : bool (default=False)
            Whether to append the observed data of each node to the replicatons.
        progress_bar : bool (default=True)
            Display progress bar

    :Returns:
        Hierarchical pandas.DataFrame with multiple sampled RT data sets.
        1st level: wfpt node
        2nd level: posterior predictive sample
        3rd level: original data index

    :See also:
        post_pred_stats
    """
    results = {}

    # Progress bar
    if progress_bar:
        n_iter = len(model.get_observeds())

        print('printing observeds')
        print(model.get_observeds())
        print('printing len of observeds')
        print(len(model.get_observeds()))
        print('printing dir of observeds')
        print(dir(model.get_observeds()))

        bar = pbar.progress_bar(
            n_iter)  # Why would n_iter be related to the number of observeds ?
        bar_iter = 0
    else:
        print("Sampling...")

    if groupby is None:
        # Generates a tuple of tuples ((node label, dataframe),...) that each contain subject specific data and name or the nodes (or generally the observeds by unit of decomposition --> usually subjects and / or conditions)
        iter_data = ((name, model.data.iloc[obs['node'].value.index])
                     for name, obs in model.iter_observeds())
    else:
        iter_data = model.data.groupby(groupby)
        print('Print iter_data')
        print(iter_data)

    for name, data in iter_data:
        print('PRINTNG data, implies one pass through the for loop')
        print(data)
        print('printing name for the data ')
        print(name)
        node = model.get_data_nodes(data.index)  # CHECK WHAT THIS DOES !

        if progress_bar:
            bar_iter += 1
            bar.update(bar_iter)

        if node is None or not hasattr(node, 'random'):
            continue  # Skip

        ##############################
        # Sample and generate stats
        datasets = _post_pred_generate(
            node, samples=samples, data=data,
            append_data=append_data)  # CHECK WHAT THIS DOES !
        results[name] = pd.concat(datasets,
                                  names=['sample'],
                                  keys=list(range(len(datasets))))

    if progress_bar:
        bar_iter += 1
        bar.update(bar_iter)

    return pd.concat(results, names=['node'])