Exemple #1
0
def _eval_block(iblock, start, stop, nstates, total_fluxes, cond_fluxes, rates,
                mcbs_alpha, mcbs_nsets, mcbs_acalpha):
    results = [[], [], []]
    # results are target fluxes, conditional fluxes, rates
    for istate in xrange(nstates):
        ci_res = mcbs_ci_correl(total_fluxes[:, istate],
                                estimator=numpy.mean,
                                alpha=mcbs_alpha,
                                n_sets=mcbs_nsets,
                                autocorrel_alpha=mcbs_acalpha,
                                subsample=numpy.mean)
        results[0].append((iblock, istate, (start, stop) + ci_res))

        for jstate in xrange(nstates):
            if istate == jstate: continue
            ci_res = mcbs_ci_correl(cond_fluxes[:, istate, jstate],
                                    estimator=numpy.mean,
                                    alpha=mcbs_alpha,
                                    n_sets=mcbs_nsets,
                                    autocorrel_alpha=mcbs_acalpha,
                                    subsample=numpy.mean)
            results[1].append((iblock, istate, jstate, (start, stop) + ci_res))

            ci_res = mcbs_ci_correl(rates[:, istate, jstate],
                                    estimator=numpy.mean,
                                    alpha=mcbs_alpha,
                                    n_sets=mcbs_nsets,
                                    autocorrel_alpha=mcbs_acalpha,
                                    subsample=numpy.mean)
            results[2].append((iblock, istate, jstate, (start, stop) + ci_res))
    return results
Exemple #2
0
def _2D_eval_block(iblock, start, stop, nstates, data_input, name, mcbs_alpha,
                   mcbs_nsets, mcbs_acalpha, do_correl, mcbs_enable,
                   estimator_kwargs):
    # As our reweighting estimator is a weird function, we can't use the general mclib block.
    results = []
    for istate in range(nstates):
        for jstate in range(nstates):
            if istate == jstate: continue
            estimator_kwargs.update(
                dict(istate=istate, jstate=jstate, nstates=nstates))

            dataset = {
                'indices':
                np.array(list(range(start - 1, stop - 1)), dtype=np.uint16)
            }

            ci_res = mcbs_ci_correl(dataset,
                                    estimator=reweight_for_c,
                                    alpha=mcbs_alpha,
                                    n_sets=mcbs_nsets,
                                    autocorrel_alpha=mcbs_acalpha,
                                    subsample=(lambda x: x[0]),
                                    do_correl=do_correl,
                                    mcbs_enable=mcbs_enable,
                                    estimator_kwargs=estimator_kwargs)
            results.append(
                (name, iblock, istate, jstate, (start, stop) + ci_res))

    return results
Exemple #3
0
def _rate_eval_block(iblock, start, stop, nstates, data_input, name,
                     mcbs_alpha, mcbs_nsets, mcbs_acalpha, do_correl,
                     mcbs_enable):
    # Our rate estimator is a little more complex, so we've defined a custom evaluation block for it,
    # instead of just using the block evalutors that we've imported.
    results = []
    for istate in range(nstates):
        for jstate in range(nstates):
            if istate == jstate: continue
            kwargs = {'istate': istate, 'jstate': jstate}
            # Why are we sending in the total population dataset, instead of a sliced one?
            # It's a requirement of our estimator; we need to pull from any given i to j state in order to properly normalize
            # and avoid i to j rate constants which are affected by a third state k.
            # That is, we need the populations for both i and j, and it's easier to just send in the entire dataset.
            dataset = {
                'dataset': data_input['dataset'][:, istate, jstate],
                'pops': data_input['pops']
            }
            ci_res = mcbs_ci_correl(dataset,
                                    estimator=sequence_macro_flux_to_rate,
                                    alpha=mcbs_alpha,
                                    n_sets=mcbs_nsets,
                                    autocorrel_alpha=mcbs_acalpha,
                                    subsample=numpy.mean,
                                    do_correl=do_correl,
                                    mcbs_enable=mcbs_enable,
                                    estimator_kwargs=kwargs)
            results.append(
                (name, iblock, istate, jstate, (start, stop) + ci_res))

    return results
Exemple #4
0
def _1D_eval_block(iblock, start, stop, nstates, data_input, name, mcbs_alpha,
                   mcbs_nsets, mcbs_acalpha, do_correl, mcbs_enable,
                   estimator_kwargs):
    # As our reweighting estimator is a weird function, we can't use the general mclib block.
    results = []
    for istate in range(nstates):
        # A little hack to make our estimator play nice, as jstate must be there.
        # For 1D datasets (state probabilities, etc), the argument isn't used in our estimator,
        # and so any variable which has the proper type is fine.
        estimator_kwargs.update(
            dict(istate=istate, jstate=istate, nstates=nstates))

        dataset = {
            'indices': np.array(list(range(start - 1, stop - 1)),
                                dtype=np.uint16)
        }

        ci_res = mcbs_ci_correl(dataset,
                                estimator=reweight_for_c,
                                alpha=mcbs_alpha,
                                n_sets=mcbs_nsets,
                                autocorrel_alpha=mcbs_acalpha,
                                subsample=(lambda x: x[0]),
                                do_correl=do_correl,
                                mcbs_enable=mcbs_enable,
                                estimator_kwargs=estimator_kwargs)
        results.append((name, iblock, istate, (start, stop) + ci_res))

    return results
Exemple #5
0
    def calc_evol_flux(self):
        westpa.rc.pstatus('Calculating cumulative evolution of flux confidence intervals every {} iteration(s)'
                        .format(self.evol_step))
        
        for itarget, (target_label, target_fluxdata) in enumerate(self.fluxdata.iteritems()):
            fluxes = target_fluxdata['flux']
            target_group = self.target_groups[target_label]
            iter_start = target_group['n_iter'][0]
            iter_stop  = target_group['n_iter'][-1]
            iter_count = iter_stop - iter_start
            n_blocks = iter_count // self.evol_step
            if iter_count % self.evol_step > 0: n_blocks += 1
            
            cis = numpy.empty((n_blocks,), dtype=ci_dtype)
            
            for iblock in xrange(n_blocks):
                block_iter_stop = min(iter_start + (iblock+1)*self.evol_step, iter_stop)
                istop = min((iblock+1)*self.evol_step, len(target_fluxdata['flux']))
                fluxes = target_fluxdata['flux'][:istop]
                
                avg, ci_lb, ci_ub, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets,
                                                                     autocorrel_alpha = self.autocorrel_alpha,
                                                                     subsample=numpy.mean)
                cis[iblock]['iter_start'] = iter_start
                cis[iblock]['iter_stop']  = block_iter_stop
                cis[iblock]['expected'], cis[iblock]['ci_lbound'], cis[iblock]['ci_ubound'] = avg, ci_lb, ci_ub
                cis[iblock]['corr_len'] = correl_len
                
                del fluxes

            cis_ds = target_group.create_dataset('flux_evolution', data=cis)
            cis_ds.attrs['iter_step'] = self.evol_step
            cis_ds.attrs['mcbs_alpha'] = self.alpha
            cis_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha
            cis_ds.attrs['mcbs_n_sets'] = self.n_sets
def _eval_block(iblock, istate, start, stop, state_pops, mcbs_alpha,
                mcbs_nsets, mcbs_acalpha):
    ci_res = mcbs_ci_correl(state_pops,
                            estimator=numpy.mean,
                            alpha=mcbs_alpha,
                            n_sets=mcbs_nsets,
                            autocorrel_alpha=mcbs_acalpha,
                            subsample=numpy.mean)
    return (iblock, istate, (start, stop) + ci_res)
Exemple #7
0
def _eval_block(iblock, start, stop, nstates, total_fluxes, cond_fluxes, rates, mcbs_alpha, mcbs_nsets, mcbs_acalpha):
    results = [[],[],[]]
    # results are target fluxes, conditional fluxes, rates
    for istate in xrange(nstates):
        ci_res = mcbs_ci_correl(total_fluxes[:,istate],estimator=numpy.mean,
                                    alpha=mcbs_alpha,n_sets=mcbs_nsets,autocorrel_alpha=mcbs_acalpha,
                                    subsample=numpy.mean)
        results[0].append((iblock,istate,(start,stop)+ci_res))
        
        for jstate in xrange(nstates):
            if istate == jstate: continue
            ci_res = mcbs_ci_correl(cond_fluxes[:,istate,jstate],estimator=numpy.mean,
                                    alpha=mcbs_alpha,n_sets=mcbs_nsets,autocorrel_alpha=mcbs_acalpha,
                                    subsample=numpy.mean)
            results[1].append((iblock, istate, jstate, (start,stop) + ci_res))
            
            ci_res = mcbs_ci_correl(rates[:,istate,jstate],estimator=numpy.mean,
                                    alpha=mcbs_alpha,n_sets=mcbs_nsets,autocorrel_alpha=mcbs_acalpha,
                                    subsample=numpy.mean)
            results[2].append((iblock, istate, jstate, (start,stop) + ci_res))
    return results
Exemple #8
0
    def calc_evol_flux(self):
        westpa.rc.pstatus(
            'Calculating cumulative evolution of flux confidence intervals every {} iteration(s)'
            .format(self.evol_step))

        for itarget, (target_label,
                      target_fluxdata) in enumerate(self.fluxdata.items()):
            fluxes = target_fluxdata['flux']
            target_group = self.target_groups[target_label]
            iter_start = target_group['n_iter'][0]
            iter_stop = target_group['n_iter'][-1]
            iter_count = iter_stop - iter_start
            n_blocks = iter_count // self.evol_step
            if iter_count % self.evol_step > 0: n_blocks += 1

            cis = numpy.empty((n_blocks, ), dtype=ci_dtype)

            for iblock in range(n_blocks):
                block_iter_stop = min(
                    iter_start + (iblock + 1) * self.evol_step, iter_stop)
                istop = min((iblock + 1) * self.evol_step,
                            len(target_fluxdata['flux']))
                fluxes = target_fluxdata['flux'][:istop]

                #avg, ci_lb, ci_ub, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets,
                #                                                     autocorrel_alpha = self.autocorrel_alpha,
                #                                                     subsample=numpy.mean)
                avg, ci_lb, ci_ub, sterr, correl_len = mclib.mcbs_ci_correl(
                    {'dataset': fluxes},
                    estimator=(lambda stride, dataset: numpy.mean(dataset)),
                    alpha=self.alpha,
                    n_sets=self.n_sets,
                    autocorrel_alpha=self.autocorrel_alpha,
                    subsample=numpy.mean,
                    do_correl=self.do_correl,
                    mcbs_enable=self.mcbs_enable)
                cis[iblock]['iter_start'] = iter_start
                cis[iblock]['iter_stop'] = block_iter_stop
                cis[iblock]['expected'], cis[iblock]['ci_lbound'], cis[iblock][
                    'ci_ubound'] = avg, ci_lb, ci_ub
                cis[iblock]['corr_len'] = correl_len
                cis[iblock]['sterr'] = sterr

                del fluxes

            cis_ds = target_group.create_dataset('flux_evolution', data=cis)
            cis_ds.attrs['iter_step'] = self.evol_step
            cis_ds.attrs['mcbs_alpha'] = self.alpha
            cis_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha
            cis_ds.attrs['mcbs_n_sets'] = self.n_sets
Exemple #9
0
def _2D_eval_block(iblock, start, stop, nstates, data_input, name, mcbs_alpha, mcbs_nsets, mcbs_acalpha, do_correl, mcbs_enable, estimator_kwargs):
    # As our reweighting estimator is a weird function, we can't use the general mclib block.
    results = []
    for istate in xrange(nstates):
        for jstate in xrange(nstates):
            if istate == jstate: continue
            estimator_kwargs.update(dict(istate=istate, jstate=jstate, nstates=nstates))

            dataset = { 'indices' : np.array(range(start-1, stop-1), dtype=np.uint16) }
            
            ci_res = mcbs_ci_correl(dataset,estimator=reweight_for_c,
                                    alpha=mcbs_alpha,n_sets=mcbs_nsets,autocorrel_alpha=mcbs_acalpha,
                                    subsample=(lambda x: x[0]), do_correl=do_correl, mcbs_enable=mcbs_enable, estimator_kwargs=estimator_kwargs)
            results.append((name, iblock, istate, jstate, (start,stop) + ci_res))

    return results
Exemple #10
0
def _1D_eval_block(iblock, start, stop, nstates, data_input, name, mcbs_alpha, mcbs_nsets, mcbs_acalpha, do_correl, mcbs_enable, estimator_kwargs):
    # As our reweighting estimator is a weird function, we can't use the general mclib block.
    results = []
    for istate in xrange(nstates):
        # A little hack to make our estimator play nice, as jstate must be there.
        # For 1D datasets (state probabilities, etc), the argument isn't used in our estimator,
        # and so any variable which has the proper type is fine.
        estimator_kwargs.update(dict(istate=istate, jstate=istate, nstates=nstates))

        dataset = { 'indices' : np.array(range(start-1, stop-1), dtype=np.uint16) }
        
        ci_res = mcbs_ci_correl(dataset,estimator=reweight_for_c,
                                alpha=mcbs_alpha,n_sets=mcbs_nsets,autocorrel_alpha=mcbs_acalpha,
                                subsample=(lambda x: x[0]), do_correl=do_correl, mcbs_enable=mcbs_enable, estimator_kwargs=estimator_kwargs)
        results.append((name, iblock, istate, (start,stop) + ci_res))

    return results
Exemple #11
0
def _rate_eval_block(iblock, start, stop, nstates, data_input, name, mcbs_alpha, mcbs_nsets, mcbs_acalpha, do_correl, mcbs_enable):
    # Our rate estimator is a little more complex, so we've defined a custom evaluation block for it,
    # instead of just using the block evalutors that we've imported.
    results = []
    for istate in xrange(nstates):
        for jstate in xrange(nstates):
            if istate == jstate: continue
            kwargs = { 'istate' : istate, 'jstate': jstate }
            # Why are we sending in the total population dataset, instead of a sliced one?
            # It's a requirement of our estimator; we need to pull from any given i to j state in order to properly normalize
            # and avoid i to j rate constants which are affected by a third state k.
            # That is, we need the populations for both i and j, and it's easier to just send in the entire dataset.
            dataset = {'dataset': data_input['dataset'][:, istate, jstate], 'pops': data_input['pops'] }
            ci_res = mcbs_ci_correl(dataset,estimator=sequence_macro_flux_to_rate,
                                    alpha=mcbs_alpha,n_sets=mcbs_nsets,autocorrel_alpha=mcbs_acalpha,
                                    subsample=numpy.mean, do_correl=do_correl, mcbs_enable=mcbs_enable, estimator_kwargs=kwargs)
            results.append((name, iblock, istate, jstate, (start,stop) + ci_res))

    return results
Exemple #12
0
    def go(self):
        pi = self.progress.indicator
        with pi:
            pi.new_operation('Initializing')
            self.open_files()
            nstates = self.assignments_file.attrs['nstates']
            nbins = self.assignments_file.attrs['nbins']
            state_labels = self.assignments_file['state_labels'][...]
            assert nstates == len(state_labels)
            start_iter, stop_iter, step_iter = self.iter_range.iter_start, self.iter_range.iter_stop, self.iter_range.iter_step
            
            pi.new_operation('Reading data')
            cond_fluxes = h5io.IterBlockedDataset(self.kinetics_file['conditional_fluxes'])
            cond_fluxes.cache_data()
            total_fluxes = h5io.IterBlockedDataset(self.kinetics_file['total_fluxes'])
            pops = h5io.IterBlockedDataset(self.assignments_file['labeled_populations'])
            pops.cache_data()
            pops.data = pops.data.sum(axis=2)
            
            rates = h5io.IterBlockedDataset.empty_like(cond_fluxes)
            rates.data = sequence_macro_flux_to_rate(cond_fluxes.data, pops.data[:nstates,:nbins])
            
            avg_total_fluxes = numpy.zeros((nstates,), dtype=ci_dtype)
            avg_conditional_fluxes = numpy.zeros((nstates,nstates), dtype=ci_dtype)
            avg_rates = numpy.zeros((nstates,nstates), dtype=ci_dtype)
            
            # Calculate overall average rates
            pi.new_operation('Averaging overall fluxes into states', nstates)
            for istate in xrange(nstates):
                ci_res = mcbs_ci_correl(total_fluxes.iter_slice(start_iter,stop_iter)[:,istate],estimator=numpy.mean,
                                            alpha=self.mcbs_alpha,n_sets=self.mcbs_nsets,autocorrel_alpha=self.mcbs_acalpha,
                                            subsample=numpy.mean)
                avg_total_fluxes[istate] = (start_iter, stop_iter) + ci_res
                pi.progress += 1
            
            pi.new_operation('Averaging state-to-state fluxes and rates', nstates*(nstates-1))
            for istate in xrange(nstates):
                for jstate in xrange(nstates):
                    if istate == jstate: continue
                    
                    flux_ci_res = mcbs_ci_correl(cond_fluxes.iter_slice(start_iter,stop_iter)[:,istate,jstate],estimator=numpy.mean,
                                                 alpha=self.mcbs_alpha,n_sets=self.mcbs_nsets,autocorrel_alpha=self.mcbs_acalpha,
                                                 subsample=numpy.mean)
                    
                    rate_ci_res = mcbs_ci_correl(rates.iter_slice(start_iter,stop_iter)[:,istate,jstate],estimator=numpy.mean,
                                                 alpha=self.mcbs_alpha,n_sets=self.mcbs_nsets,autocorrel_alpha=self.mcbs_acalpha,
                                                 subsample=numpy.mean)
                    
                    avg_conditional_fluxes[istate, jstate] = (start_iter, stop_iter) + flux_ci_res
                    avg_rates[istate, jstate] = (start_iter, stop_iter) + rate_ci_res
                    pi.progress += 1
                    
            pi.new_operation('Saving averages')
            self.output_file['avg_rates'] = avg_rates
            self.output_file['avg_conditional_fluxes'] = avg_conditional_fluxes
            self.output_file['avg_total_fluxes'] = avg_total_fluxes
            for ds in ('avg_rates', 'avg_conditional_fluxes', 'avg_total_fluxes'):
                self.stamp_mcbs_info(self.output_file[ds])

            self.output_file['state_labels'] = state_labels
            maxlabellen = max(map(len,state_labels))
            pi.clear()
            
            print('fluxes into macrostates:')
            for istate in xrange(nstates):
                print('{:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e}) * tau^-1'
                      .format(state_labels[istate],
                              avg_total_fluxes['expected'][istate],
                              avg_total_fluxes['ci_lbound'][istate],
                              avg_total_fluxes['ci_ubound'][istate],
                              maxlabellen=maxlabellen))

            print('\nfluxes from state to state:')
            for istate in xrange(nstates):
                for jstate in xrange(nstates):
                    if istate == jstate: continue
                    print('{:{maxlabellen}s} -> {:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e}) * tau^-1'
                          .format(state_labels[istate], state_labels[jstate],
                                  avg_conditional_fluxes['expected'][istate,jstate],
                                  avg_conditional_fluxes['ci_lbound'][istate,jstate],
                                  avg_conditional_fluxes['ci_ubound'][istate,jstate],
                                  maxlabellen=maxlabellen))
                
            print('\nrates from state to state:')
            for istate in xrange(nstates):
                for jstate in xrange(nstates):
                    if istate == jstate: continue
                    print('{:{maxlabellen}s} -> {:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e}) * tau^-1'
                          .format(state_labels[istate], state_labels[jstate],
                                  avg_rates['expected'][istate,jstate],
                                  avg_rates['ci_lbound'][istate,jstate],
                                  avg_rates['ci_ubound'][istate,jstate],
                                  maxlabellen=maxlabellen))
            
            # skip evolution if not requested
            if self.evolution_mode == 'none' or not step_iter: return
            
            start_pts = range(start_iter, stop_iter, step_iter)
            target_evol = numpy.zeros((len(start_pts), nstates), dtype=ci_dtype)
            flux_evol = numpy.zeros((len(start_pts), nstates, nstates), dtype=ci_dtype)
            rate_evol = numpy.zeros((len(start_pts), nstates, nstates), dtype=ci_dtype)
            pi.new_operation('Calculating flux/rate evolution', len(start_pts))
            futures = []
            for iblock, start in enumerate(start_pts):
                stop = min(start+step_iter, stop_iter)
                if self.evolution_mode == 'cumulative':
                    windowsize = int(self.evol_window_frac * (stop - start_iter))
                    block_start = max(start_iter, stop - windowsize)
                else: # self.evolution_mode == 'blocked'
                    block_start = start
                
                future = self.work_manager.submit(_eval_block, kwargs=dict(iblock=iblock, start=block_start, stop=stop,
                                                                           nstates=nstates,
                                                                           total_fluxes=total_fluxes.iter_slice(block_start,stop),
                                                                           cond_fluxes = cond_fluxes.iter_slice(block_start,stop),
                                                                           rates=rates.iter_slice(block_start,stop),
                                                                           mcbs_alpha=self.mcbs_alpha, mcbs_nsets=self.mcbs_nsets,
                                                                           mcbs_acalpha=self.mcbs_acalpha))
                futures.append(future)
            
            for future in self.work_manager.as_completed(futures):
                pi.progress += 1
                target_results, condflux_results, rate_results = future.get_result(discard=True)
                for result in target_results:
                    iblock,istate,ci_result = result
                    target_evol[iblock,istate] = ci_result
                    
                for result in condflux_results:
                    iblock,istate,jstate,ci_result = result
                    flux_evol[iblock,istate, jstate] = ci_result
                
                for result in rate_results:
                    iblock, istate, jstate, ci_result = result 
                    rate_evol[iblock, istate, jstate] = ci_result

            df_ds = self.output_file.create_dataset('conditional_flux_evolution', data=flux_evol, shuffle=True, compression=9)
            tf_ds = self.output_file.create_dataset('target_flux_evolution', data=target_evol, shuffle=True, compression=9)
            rate_ds = self.output_file.create_dataset('rate_evolution', data=rate_evol, shuffle=True, compression=9)
            
            for ds in (df_ds, tf_ds, rate_ds):
                self.stamp_mcbs_info(ds)
Exemple #13
0
    def calc_store_flux_data(self):
        westpa.rc.pstatus(
            'Calculating mean flux and confidence intervals for iterations [{},{})'
            .format(self.iter_range.iter_start, self.iter_range.iter_stop))

        fluxdata = extract_fluxes(self.iter_range.iter_start,
                                  self.iter_range.iter_stop, self.data_reader)

        # Create a group to store data in
        output_group = h5io.create_hdf5_group(self.output_h5file,
                                              'target_flux',
                                              replace=False,
                                              creating_program=self.prog)
        self.output_group = output_group
        output_group.attrs['version_code'] = self.output_format_version
        self.iter_range.record_data_iter_range(output_group)

        n_targets = len(fluxdata)
        index = numpy.empty((len(fluxdata), ), dtype=target_index_dtype)
        avg_fluxdata = numpy.empty((n_targets, ), dtype=ci_dtype)

        for itarget, (target_label,
                      target_fluxdata) in enumerate(fluxdata.items()):
            # Create group and index entry
            index[itarget]['target_label'] = str(target_label)
            target_group = output_group.create_group(
                'target_{}'.format(itarget))

            self.target_groups[target_label] = target_group

            # Store per-iteration values
            target_group['n_iter'] = target_fluxdata['n_iter']
            target_group['count'] = target_fluxdata['count']
            target_group['flux'] = target_fluxdata['flux']
            h5io.label_axes(target_group['flux'], ['n_iter'], units=['tau^-1'])

            # Calculate flux autocorrelation
            fluxes = target_fluxdata['flux']
            mean_flux = fluxes.mean()
            fmm = fluxes - mean_flux
            acorr = fftconvolve(fmm, fmm[::-1])
            acorr = acorr[len(acorr) // 2:]
            acorr /= acorr[0]
            acorr_ds = target_group.create_dataset('flux_autocorrel',
                                                   data=acorr)
            h5io.label_axes(acorr_ds, ['lag'], ['tau'])

            # Calculate overall averages and CIs
            #avg, lb_ci, ub_ci, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets,
            #                                                     autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean)
            avg, lb_ci, ub_ci, sterr, correl_len = mclib.mcbs_ci_correl(
                {'dataset': fluxes},
                estimator=(lambda stride, dataset: numpy.mean(dataset)),
                alpha=self.alpha,
                n_sets=self.n_sets,
                autocorrel_alpha=self.autocorrel_alpha,
                subsample=numpy.mean,
                do_correl=self.do_correl,
                mcbs_enable=self.mcbs_enable)
            avg_fluxdata[itarget] = (self.iter_range.iter_start,
                                     self.iter_range.iter_stop, avg, lb_ci,
                                     ub_ci, sterr, correl_len)
            westpa.rc.pstatus('target {!r}:'.format(target_label))
            westpa.rc.pstatus(
                '  correlation length = {} tau'.format(correl_len))
            westpa.rc.pstatus(
                '  mean flux and CI   = {:e} ({:e},{:e}) tau^(-1)'.format(
                    avg, lb_ci, ub_ci))
            index[itarget]['mean_flux'] = avg
            index[itarget]['mean_flux_ci_lb'] = lb_ci
            index[itarget]['mean_flux_ci_ub'] = ub_ci
            index[itarget]['mean_flux_correl_len'] = correl_len

        # Write index and summary
        index_ds = output_group.create_dataset('index', data=index)
        index_ds.attrs['mcbs_alpha'] = self.alpha
        index_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha
        index_ds.attrs['mcbs_n_sets'] = self.n_sets

        self.fluxdata = fluxdata
        self.output_h5file['avg_flux'] = avg_fluxdata
def _eval_block(iblock, istate, start, stop, state_pops, mcbs_alpha, mcbs_nsets, mcbs_acalpha):
    ci_res = mcbs_ci_correl(state_pops,estimator=numpy.mean,alpha=mcbs_alpha,n_sets=mcbs_nsets,
                            autocorrel_alpha=mcbs_acalpha,subsample=numpy.mean)
    return (iblock,istate,(start,stop)+ci_res)
Exemple #15
0
    def go(self):
        pi = self.progress.indicator
        with pi:
            pi.new_operation('Initializing')
            self.open_files()
            nstates = self.assignments_file.attrs['nstates']
            nbins = self.assignments_file.attrs['nbins']
            state_labels = self.assignments_file['state_labels'][...]
            assert nstates == len(state_labels)
            start_iter, stop_iter, step_iter = self.iter_range.iter_start, self.iter_range.iter_stop, self.iter_range.iter_step

            pi.new_operation('Reading data')
            cond_fluxes = h5io.IterBlockedDataset(
                self.kinetics_file['conditional_fluxes'])
            cond_fluxes.cache_data()
            total_fluxes = h5io.IterBlockedDataset(
                self.kinetics_file['total_fluxes'])
            pops = h5io.IterBlockedDataset(
                self.assignments_file['labeled_populations'])
            pops.cache_data()
            pops.data = pops.data.sum(axis=2)

            rates = h5io.IterBlockedDataset.empty_like(cond_fluxes)
            rates.data = sequence_macro_flux_to_rate(
                cond_fluxes.data, pops.data[:nstates, :nbins])

            avg_total_fluxes = numpy.zeros((nstates, ), dtype=ci_dtype)
            avg_conditional_fluxes = numpy.zeros((nstates, nstates),
                                                 dtype=ci_dtype)
            avg_rates = numpy.zeros((nstates, nstates), dtype=ci_dtype)

            # Calculate overall average rates
            pi.new_operation('Averaging overall fluxes into states', nstates)
            for istate in xrange(nstates):
                ci_res = mcbs_ci_correl(total_fluxes.iter_slice(
                    start_iter, stop_iter)[:, istate],
                                        estimator=numpy.mean,
                                        alpha=self.mcbs_alpha,
                                        n_sets=self.mcbs_nsets,
                                        autocorrel_alpha=self.mcbs_acalpha,
                                        subsample=numpy.mean)
                avg_total_fluxes[istate] = (start_iter, stop_iter) + ci_res
                pi.progress += 1

            pi.new_operation('Averaging state-to-state fluxes and rates',
                             nstates * (nstates - 1))
            for istate in xrange(nstates):
                for jstate in xrange(nstates):
                    if istate == jstate: continue

                    flux_ci_res = mcbs_ci_correl(
                        cond_fluxes.iter_slice(start_iter,
                                               stop_iter)[:, istate, jstate],
                        estimator=numpy.mean,
                        alpha=self.mcbs_alpha,
                        n_sets=self.mcbs_nsets,
                        autocorrel_alpha=self.mcbs_acalpha,
                        subsample=numpy.mean)

                    rate_ci_res = mcbs_ci_correl(
                        rates.iter_slice(start_iter, stop_iter)[:, istate,
                                                                jstate],
                        estimator=numpy.mean,
                        alpha=self.mcbs_alpha,
                        n_sets=self.mcbs_nsets,
                        autocorrel_alpha=self.mcbs_acalpha,
                        subsample=numpy.mean)

                    avg_conditional_fluxes[istate,
                                           jstate] = (start_iter,
                                                      stop_iter) + flux_ci_res
                    avg_rates[istate,
                              jstate] = (start_iter, stop_iter) + rate_ci_res
                    pi.progress += 1

            pi.new_operation('Saving averages')
            self.output_file['avg_rates'] = avg_rates
            self.output_file['avg_conditional_fluxes'] = avg_conditional_fluxes
            self.output_file['avg_total_fluxes'] = avg_total_fluxes
            for ds in ('avg_rates', 'avg_conditional_fluxes',
                       'avg_total_fluxes'):
                self.stamp_mcbs_info(self.output_file[ds])

            self.output_file['state_labels'] = state_labels
            maxlabellen = max(map(len, state_labels))
            pi.clear()

            print('fluxes into macrostates:')
            for istate in xrange(nstates):
                print(
                    '{:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e}) * tau^-1'
                    .format(state_labels[istate],
                            avg_total_fluxes['expected'][istate],
                            avg_total_fluxes['ci_lbound'][istate],
                            avg_total_fluxes['ci_ubound'][istate],
                            maxlabellen=maxlabellen))

            print('\nfluxes from state to state:')
            for istate in xrange(nstates):
                for jstate in xrange(nstates):
                    if istate == jstate: continue
                    print(
                        '{:{maxlabellen}s} -> {:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e}) * tau^-1'
                        .format(state_labels[istate],
                                state_labels[jstate],
                                avg_conditional_fluxes['expected'][istate,
                                                                   jstate],
                                avg_conditional_fluxes['ci_lbound'][istate,
                                                                    jstate],
                                avg_conditional_fluxes['ci_ubound'][istate,
                                                                    jstate],
                                maxlabellen=maxlabellen))

            print('\nrates from state to state:')
            for istate in xrange(nstates):
                for jstate in xrange(nstates):
                    if istate == jstate: continue
                    print(
                        '{:{maxlabellen}s} -> {:{maxlabellen}s}: mean={:21.15e} CI=({:21.15e}, {:21.15e}) * tau^-1'
                        .format(state_labels[istate],
                                state_labels[jstate],
                                avg_rates['expected'][istate, jstate],
                                avg_rates['ci_lbound'][istate, jstate],
                                avg_rates['ci_ubound'][istate, jstate],
                                maxlabellen=maxlabellen))

            # skip evolution if not requested
            if self.evolution_mode == 'none' or not step_iter: return

            start_pts = range(start_iter, stop_iter, step_iter)
            target_evol = numpy.zeros((len(start_pts), nstates),
                                      dtype=ci_dtype)
            flux_evol = numpy.zeros((len(start_pts), nstates, nstates),
                                    dtype=ci_dtype)
            rate_evol = numpy.zeros((len(start_pts), nstates, nstates),
                                    dtype=ci_dtype)
            pi.new_operation('Calculating flux/rate evolution', len(start_pts))
            futures = []
            for iblock, start in enumerate(start_pts):
                stop = min(start + step_iter, stop_iter)
                if self.evolution_mode == 'cumulative':
                    windowsize = int(self.evol_window_frac *
                                     (stop - start_iter))
                    block_start = max(start_iter, stop - windowsize)
                else:  # self.evolution_mode == 'blocked'
                    block_start = start

                future = self.work_manager.submit(
                    _eval_block,
                    kwargs=dict(iblock=iblock,
                                start=block_start,
                                stop=stop,
                                nstates=nstates,
                                total_fluxes=total_fluxes.iter_slice(
                                    block_start, stop),
                                cond_fluxes=cond_fluxes.iter_slice(
                                    block_start, stop),
                                rates=rates.iter_slice(block_start, stop),
                                mcbs_alpha=self.mcbs_alpha,
                                mcbs_nsets=self.mcbs_nsets,
                                mcbs_acalpha=self.mcbs_acalpha))
                futures.append(future)

            for future in self.work_manager.as_completed(futures):
                pi.progress += 1
                target_results, condflux_results, rate_results = future.get_result(
                    discard=True)
                for result in target_results:
                    iblock, istate, ci_result = result
                    target_evol[iblock, istate] = ci_result

                for result in condflux_results:
                    iblock, istate, jstate, ci_result = result
                    flux_evol[iblock, istate, jstate] = ci_result

                for result in rate_results:
                    iblock, istate, jstate, ci_result = result
                    rate_evol[iblock, istate, jstate] = ci_result

            df_ds = self.output_file.create_dataset(
                'conditional_flux_evolution',
                data=flux_evol,
                shuffle=True,
                compression=9)
            tf_ds = self.output_file.create_dataset('target_flux_evolution',
                                                    data=target_evol,
                                                    shuffle=True,
                                                    compression=9)
            rate_ds = self.output_file.create_dataset('rate_evolution',
                                                      data=rate_evol,
                                                      shuffle=True,
                                                      compression=9)

            for ds in (df_ds, tf_ds, rate_ds):
                self.stamp_mcbs_info(ds)
Exemple #16
0
                                            numpy.mean,
                                            0.05,
                                            n_sets=n_sets,
                                            sort=numpy.msort)
n_uncorrel_width = uncorrel_ub - uncorrel_lb
n_correl_width = correl_ub - correl_lb
print('  uncorrelated:    {} ({},{})'.format(uncorrel_mean, uncorrel_lb,
                                             uncorrel_ub))
print('  correlated:      {} ({},{})'.format(correl_mean, correl_lb,
                                             correl_ub))
print('  width ratio c/u: {}'.format((n_correl_width / n_uncorrel_width)))

print('blocked MCBS:')
#uncorrel_mean, uncorrel_lb, uncorrel_ub = mcbs_ci(ds_uncorrel[::k_uncorrel+1], numpy.mean, 0.05, n_sets=1000, sort=numpy.msort)
#correl_mean, correl_lb, correl_ub = mcbs_ci(ds_correl[::k_correl+1], numpy.mean, 0.05, n_sets=1000, sort=numpy.msort)
uncorrel_mean, uncorrel_lb, uncorrel_ub, k_ = mcbs_ci_correl(
    ds_uncorrel, numpy.mean, 0.05, n_sets=n_sets, subsample=numpy.mean)
correl_mean, correl_lb, correl_ub, k_ = mcbs_ci_correl(ds_correl,
                                                       numpy.mean,
                                                       0.05,
                                                       n_sets=n_sets,
                                                       subsample=numpy.mean)
b_uncorrel_width = uncorrel_ub - uncorrel_lb
b_correl_width = correl_ub - correl_lb
print('  uncorrelated:    {} ({},{})'.format(uncorrel_mean, uncorrel_lb,
                                             uncorrel_ub))
print('  correlated:      {} ({},{})'.format(correl_mean, correl_lb,
                                             correl_ub))
print('  width ratio c/u: {}'.format((b_correl_width / b_uncorrel_width)))

print('width ratio blocked/naive:')
print('  uncorrelated:    {}'.format(b_uncorrel_width / n_uncorrel_width))
Exemple #17
0
    def calc_store_flux_data(self):         
        westpa.rc.pstatus('Calculating mean flux and confidence intervals for iterations [{},{})'
                        .format(self.iter_range.iter_start, self.iter_range.iter_stop))
        
        fluxdata = extract_fluxes(self.iter_range.iter_start, self.iter_range.iter_stop, self.data_reader)
        
        # Create a group to store data in
        output_group = h5io.create_hdf5_group(self.output_h5file, 'target_flux', replace=False, creating_program=self.prog)        
        self.output_group = output_group
        output_group.attrs['version_code'] = self.output_format_version
        self.iter_range.record_data_iter_range(output_group)
        
        n_targets = len(fluxdata)
        index = numpy.empty((len(fluxdata),), dtype=target_index_dtype)
        avg_fluxdata = numpy.empty((n_targets,), dtype=ci_dtype)
        

        for itarget, (target_label, target_fluxdata) in enumerate(fluxdata.iteritems()):
            # Create group and index entry
            index[itarget]['target_label'] = str(target_label)
            target_group = output_group.create_group('target_{}'.format(itarget))

            self.target_groups[target_label] = target_group
            
            # Store per-iteration values
            target_group['n_iter'] = target_fluxdata['n_iter']
            target_group['count'] = target_fluxdata['count']
            target_group['flux'] = target_fluxdata['flux']
            h5io.label_axes(target_group['flux'], ['n_iter'], units=['tau^-1'])
            
            
            # Calculate flux autocorrelation
            fluxes = target_fluxdata['flux']
            mean_flux = fluxes.mean()
            fmm = fluxes - mean_flux
            acorr = fftconvolve(fmm,fmm[::-1])
            acorr = acorr[len(acorr)//2:]
            acorr /= acorr[0]
            acorr_ds = target_group.create_dataset('flux_autocorrel', data=acorr)
            h5io.label_axes(acorr_ds, ['lag'], ['tau'])
            
            # Calculate overall averages and CIs
            #avg, lb_ci, ub_ci, correl_len = mclib.mcbs_ci_correl(fluxes, numpy.mean, self.alpha, self.n_sets,
            #                                                     autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean)
            avg, lb_ci, ub_ci, sterr, correl_len = mclib.mcbs_ci_correl({'dataset': fluxes}, estimator=(lambda stride, dataset: numpy.mean(dataset)), alpha=self.alpha, n_sets=self.n_sets,
                                                                 autocorrel_alpha=self.autocorrel_alpha, subsample=numpy.mean, do_correl=self.do_correl, mcbs_enable=self.mcbs_enable )
            avg_fluxdata[itarget] = (self.iter_range.iter_start, self.iter_range.iter_stop, avg, lb_ci, ub_ci, sterr, correl_len)
            westpa.rc.pstatus('target {!r}:'.format(target_label))
            westpa.rc.pstatus('  correlation length = {} tau'.format(correl_len))
            westpa.rc.pstatus('  mean flux and CI   = {:e} ({:e},{:e}) tau^(-1)'.format(avg,lb_ci,ub_ci))
            index[itarget]['mean_flux'] = avg
            index[itarget]['mean_flux_ci_lb'] = lb_ci
            index[itarget]['mean_flux_ci_ub'] = ub_ci
            index[itarget]['mean_flux_correl_len'] = correl_len

        # Write index and summary        
        index_ds = output_group.create_dataset('index', data=index)
        index_ds.attrs['mcbs_alpha'] = self.alpha
        index_ds.attrs['mcbs_autocorrel_alpha'] = self.autocorrel_alpha
        index_ds.attrs['mcbs_n_sets'] = self.n_sets
        
        self.fluxdata = fluxdata
        self.output_h5file['avg_flux'] = avg_fluxdata